aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-02-20 20:52:00 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2017-02-20 20:52:00 +0100
commitce8226f1a7d64da56117d2f7f351e06225a84614 (patch)
tree9f2d716df0230f5f17372f19b8718dcf86039fd9 /src/Text
parente86e44b98e592d5a5e4c6b43d9b57b195f091ed9 (diff)
parent12d96508c62189b4ff8c8b797d34cc9ef177f5ee (diff)
downloadpandoc-ce8226f1a7d64da56117d2f7f351e06225a84614.tar.gz
Merge commit '9e52ac6bb02afd7b4ed5dad61021a1fa33051203' as 'data/templates'
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs380
-rw-r--r--src/Text/Pandoc/App.hs1444
-rw-r--r--src/Text/Pandoc/Asciify.hs422
-rw-r--r--src/Text/Pandoc/CSS.hs43
-rw-r--r--src/Text/Pandoc/Class.hs539
-rw-r--r--src/Text/Pandoc/Compat/Time.hs30
-rw-r--r--src/Text/Pandoc/Data.hsb15
-rw-r--r--src/Text/Pandoc/Emoji.hs906
-rw-r--r--src/Text/Pandoc/Error.hs76
-rw-r--r--src/Text/Pandoc/Extensions.hs267
-rw-r--r--src/Text/Pandoc/Highlighting.hs223
-rw-r--r--src/Text/Pandoc/ImageSize.hs547
-rw-r--r--src/Text/Pandoc/Logging.hs232
-rw-r--r--src/Text/Pandoc/MIME.hs527
-rw-r--r--src/Text/Pandoc/MediaBag.hs113
-rw-r--r--src/Text/Pandoc/Options.hs217
-rw-r--r--src/Text/Pandoc/PDF.hs369
-rw-r--r--src/Text/Pandoc/Parsing.hs1329
-rw-r--r--src/Text/Pandoc/Pretty.hs557
-rw-r--r--src/Text/Pandoc/Process.hs98
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs128
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs1055
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs626
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs154
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs229
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs1044
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs108
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs47
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs279
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs1136
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs160
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1437
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2119
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs677
-rw-r--r--src/Text/Pandoc/Readers/Native.hs71
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs103
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs113
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs253
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs495
-rw-r--r--src/Text/Pandoc/Readers/Odt/Base.hs43
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs929
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs260
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs62
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs48
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs171
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs1063
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs110
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs744
-rw-r--r--src/Text/Pandoc/Readers/Org.hs62
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs137
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs979
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs172
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs880
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs218
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs259
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs217
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs97
-rw-r--r--src/Text/Pandoc/Readers/RST.hs1354
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs525
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs729
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs596
-rw-r--r--src/Text/Pandoc/SelfContained.hs181
-rw-r--r--src/Text/Pandoc/Shared.hs883
-rw-r--r--src/Text/Pandoc/Slides.hs63
-rw-r--r--src/Text/Pandoc/Templates.hs77
-rw-r--r--src/Text/Pandoc/UTF8.hs121
-rw-r--r--src/Text/Pandoc/UUID.hs78
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs470
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs190
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs481
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs322
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs440
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs1302
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs522
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs1257
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs617
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs1069
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs370
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs584
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs1388
-rw-r--r--src/Text/Pandoc/Writers/Man.hs381
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs1147
-rw-r--r--src/Text/Pandoc/Writers/Math.hs49
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs442
-rw-r--r--src/Text/Pandoc/Writers/Native.hs79
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs210
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs103
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs626
-rw-r--r--src/Text/Pandoc/Writers/Org.hs411
-rw-r--r--src/Text/Pandoc/Writers/RST.hs556
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs412
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs183
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs324
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs498
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs486
-rw-r--r--src/Text/Pandoc/Writers/ZimWiki.hs396
-rw-r--r--src/Text/Pandoc/XML.hs115
97 files changed, 0 insertions, 45056 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
deleted file mode 100644
index 47b891eb3..000000000
--- a/src/Text/Pandoc.hs
+++ /dev/null
@@ -1,380 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, GADTs #-}
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-This helper module exports the main writers, readers, and data
-structure definitions from the Pandoc libraries.
-
-A typical application will chain together a reader and a writer
-to convert strings from one format to another. For example, the
-following simple program will act as a filter converting markdown
-fragments to reStructuredText, using reference-style links instead of
-inline links:
-
-> module Main where
-> import Text.Pandoc
->
-> markdownToRST :: String -> Either PandocError String
-> markdownToRST =
-> writeRST def {writerReferenceLinks = True} . readMarkdown def
->
-> main = getContents >>= either error return markdownToRST >>= putStrLn
-
-Note: all of the readers assume that the input text has @'\n'@
-line endings. So if you get your input text from a web form,
-you should remove @'\r'@ characters using @filter (/='\r')@.
-
--}
-
-module Text.Pandoc
- (
- -- * Definitions
- module Text.Pandoc.Definition
- -- * Generics
- , module Text.Pandoc.Generic
- -- * Options
- , module Text.Pandoc.Options
- -- * Logging
- , module Text.Pandoc.Logging
- -- * Typeclass
- , PandocMonad
- , runIO
- , runPure
- , runIOorExplode
- , setVerbosity
- -- * Error handling
- , module Text.Pandoc.Error
- -- * Lists of readers and writers
- , readers
- -- , writers
- , writers
- -- * Readers: converting /to/ Pandoc format
- , Reader (..)
- , readDocx
- , readOdt
- , readMarkdown
- , readCommonMark
- , readMediaWiki
- , readRST
- , readOrg
- , readLaTeX
- , readHtml
- , readTextile
- , readDocBook
- , readOPML
- , readHaddock
- , readNative
- , readJSON
- , readTWiki
- , readTxt2Tags
- , readEPUB
- -- * Writers: converting /from/ Pandoc format
- , Writer(..)
- , writeNative
- , writeJSON
- , writeMarkdown
- , writePlain
- , writeRST
- , writeLaTeX
- , writeBeamer
- , writeConTeXt
- , writeTexinfo
- , writeHtml4
- , writeHtml4String
- , writeHtml5
- , writeHtml5String
- , writeRevealJs
- , writeS5
- , writeSlidy
- , writeSlideous
- , writeDZSlides
- , writeICML
- , writeDocbook4
- , writeDocbook5
- , writeOPML
- , writeOpenDocument
- , writeMan
- , writeMediaWiki
- , writeDokuWiki
- , writeZimWiki
- , writeTextile
- , writeRTF
- , writeODT
- , writeDocx
- , writeEPUB2
- , writeEPUB3
- , writeFB2
- , writeOrg
- , writeAsciiDoc
- , writeHaddock
- , writeCommonMark
- , writeCustom
- , writeTEI
- -- * Rendering templates and default templates
- , module Text.Pandoc.Templates
- -- * Miscellaneous
- , getReader
- , getWriter
- , getDefaultExtensions
- , pandocVersion
- ) where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Generic
-import Text.Pandoc.Readers.Markdown
-import Text.Pandoc.Readers.CommonMark
-import Text.Pandoc.Readers.MediaWiki
-import Text.Pandoc.Readers.RST
-import Text.Pandoc.Readers.Org
-import Text.Pandoc.Readers.DocBook
-import Text.Pandoc.Readers.OPML
-import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.HTML
-import Text.Pandoc.Readers.Textile
-import Text.Pandoc.Readers.Native
-import Text.Pandoc.Readers.Haddock
-import Text.Pandoc.Readers.TWiki
-import Text.Pandoc.Readers.Docx
-import Text.Pandoc.Readers.Odt
-import Text.Pandoc.Readers.Txt2Tags
-import Text.Pandoc.Readers.EPUB
-import Text.Pandoc.Writers.Native
-import Text.Pandoc.Writers.Markdown
-import Text.Pandoc.Writers.RST
-import Text.Pandoc.Writers.LaTeX
-import Text.Pandoc.Writers.ConTeXt
-import Text.Pandoc.Writers.Texinfo
-import Text.Pandoc.Writers.HTML
-import Text.Pandoc.Writers.ODT
-import Text.Pandoc.Writers.Docx
-import Text.Pandoc.Writers.EPUB
-import Text.Pandoc.Writers.FB2
-import Text.Pandoc.Writers.ICML
-import Text.Pandoc.Writers.Docbook
-import Text.Pandoc.Writers.OPML
-import Text.Pandoc.Writers.OpenDocument
-import Text.Pandoc.Writers.Man
-import Text.Pandoc.Writers.RTF
-import Text.Pandoc.Writers.MediaWiki
-import Text.Pandoc.Writers.DokuWiki
-import Text.Pandoc.Writers.ZimWiki
-import Text.Pandoc.Writers.Textile
-import Text.Pandoc.Writers.Org
-import Text.Pandoc.Writers.AsciiDoc
-import Text.Pandoc.Writers.Haddock
-import Text.Pandoc.Writers.CommonMark
-import Text.Pandoc.Writers.Custom
-import Text.Pandoc.Writers.TEI
-import Text.Pandoc.Templates
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Shared (safeRead, mapLeft, pandocVersion)
-import Text.Pandoc.Error
-import Text.Pandoc.Class
-import Data.Aeson
-import qualified Data.ByteString.Lazy as BL
-import Data.List (intercalate)
-import Text.Parsec
-import Text.Parsec.Error
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad.Except (throwError)
-
-parseFormatSpec :: String
- -> Either ParseError (String, Extensions -> Extensions)
-parseFormatSpec = parse formatSpec ""
- where formatSpec = do
- name <- formatName
- extMods <- many extMod
- return (name, \x -> foldl (flip ($)) x extMods)
- formatName = many1 $ noneOf "-+"
- extMod = do
- polarity <- oneOf "-+"
- name <- many $ noneOf "-+"
- ext <- case safeRead ("Ext_" ++ name) of
- Just n -> return n
- Nothing
- | name == "lhs" -> return Ext_literate_haskell
- | otherwise -> fail $ "Unknown extension: " ++ name
- return $ case polarity of
- '-' -> disableExtension ext
- _ -> enableExtension ext
-
-data Reader m = StringReader (ReaderOptions -> String -> m Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
-
--- | Association list of formats and readers.
-readers :: PandocMonad m => [(String, Reader m)]
-readers = [ ("native" , StringReader readNative)
- ,("json" , StringReader $ \o s ->
- case readJSON o s of
- Right doc -> return doc
- Left _ -> throwError $ PandocParseError "JSON parse error")
- ,("markdown" , StringReader readMarkdown)
- ,("markdown_strict" , StringReader readMarkdown)
- ,("markdown_phpextra" , StringReader readMarkdown)
- ,("markdown_github" , StringReader readMarkdown)
- ,("markdown_mmd", StringReader readMarkdown)
- ,("commonmark" , StringReader readCommonMark)
- ,("rst" , StringReader readRST)
- ,("mediawiki" , StringReader readMediaWiki)
- ,("docbook" , StringReader readDocBook)
- ,("opml" , StringReader readOPML)
- ,("org" , StringReader readOrg)
- ,("textile" , StringReader readTextile) -- TODO : textile+lhs
- ,("html" , StringReader readHtml)
- ,("latex" , StringReader readLaTeX)
- ,("haddock" , StringReader readHaddock)
- ,("twiki" , StringReader readTWiki)
- ,("docx" , ByteStringReader readDocx)
- ,("odt" , ByteStringReader readOdt)
- ,("t2t" , StringReader readTxt2Tags)
- ,("epub" , ByteStringReader readEPUB)
- ]
-
-data Writer m = StringWriter (WriterOptions -> Pandoc -> m String)
- | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString)
-
--- | Association list of formats and writers.
-writers :: PandocMonad m => [ ( String, Writer m) ]
-writers = [
- ("native" , StringWriter writeNative)
- ,("json" , StringWriter $ \o d -> return $ writeJSON o d)
- ,("docx" , ByteStringWriter writeDocx)
- ,("odt" , ByteStringWriter writeODT)
- ,("epub" , ByteStringWriter writeEPUB3)
- ,("epub2" , ByteStringWriter writeEPUB2)
- ,("epub3" , ByteStringWriter writeEPUB3)
- ,("fb2" , StringWriter writeFB2)
- ,("html" , StringWriter writeHtml5String)
- ,("html4" , StringWriter writeHtml4String)
- ,("html5" , StringWriter writeHtml5String)
- ,("icml" , StringWriter writeICML)
- ,("s5" , StringWriter writeS5)
- ,("slidy" , StringWriter writeSlidy)
- ,("slideous" , StringWriter writeSlideous)
- ,("dzslides" , StringWriter writeDZSlides)
- ,("revealjs" , StringWriter writeRevealJs)
- ,("docbook" , StringWriter writeDocbook5)
- ,("docbook4" , StringWriter writeDocbook4)
- ,("docbook5" , StringWriter writeDocbook5)
- ,("opml" , StringWriter writeOPML)
- ,("opendocument" , StringWriter writeOpenDocument)
- ,("latex" , StringWriter writeLaTeX)
- ,("beamer" , StringWriter writeBeamer)
- ,("context" , StringWriter writeConTeXt)
- ,("texinfo" , StringWriter writeTexinfo)
- ,("man" , StringWriter writeMan)
- ,("markdown" , StringWriter writeMarkdown)
- ,("markdown_strict" , StringWriter writeMarkdown)
- ,("markdown_phpextra" , StringWriter writeMarkdown)
- ,("markdown_github" , StringWriter writeMarkdown)
- ,("markdown_mmd" , StringWriter writeMarkdown)
- ,("plain" , StringWriter writePlain)
- ,("rst" , StringWriter writeRST)
- ,("mediawiki" , StringWriter writeMediaWiki)
- ,("dokuwiki" , StringWriter writeDokuWiki)
- ,("zimwiki" , StringWriter writeZimWiki)
- ,("textile" , StringWriter writeTextile)
- ,("rtf" , StringWriter writeRTF)
- ,("org" , StringWriter writeOrg)
- ,("asciidoc" , StringWriter writeAsciiDoc)
- ,("haddock" , StringWriter writeHaddock)
- ,("commonmark" , StringWriter writeCommonMark)
- ,("tei" , StringWriter writeTEI)
- ]
-
-getDefaultExtensions :: String -> Extensions
-getDefaultExtensions "markdown_strict" = strictExtensions
-getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
-getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
-getDefaultExtensions "markdown_github" = githubMarkdownExtensions
-getDefaultExtensions "markdown" = pandocExtensions
-getDefaultExtensions "plain" = plainExtensions
-getDefaultExtensions "org" = extensionsFromList
- [Ext_citations,
- Ext_auto_identifiers]
-getDefaultExtensions "html" = extensionsFromList
- [Ext_auto_identifiers,
- Ext_native_divs,
- Ext_native_spans]
-getDefaultExtensions "html4" = getDefaultExtensions "html"
-getDefaultExtensions "html5" = getDefaultExtensions "html"
-getDefaultExtensions "epub" = extensionsFromList
- [Ext_raw_html,
- Ext_native_divs,
- Ext_native_spans,
- Ext_epub_html_exts]
-getDefaultExtensions "epub2" = getDefaultExtensions "epub"
-getDefaultExtensions "epub3" = getDefaultExtensions "epub"
-getDefaultExtensions "latex" = extensionsFromList
- [Ext_smart,
- Ext_auto_identifiers]
-getDefaultExtensions "context" = extensionsFromList
- [Ext_smart,
- Ext_auto_identifiers]
-getDefaultExtensions "textile" = extensionsFromList
- [Ext_old_dashes,
- Ext_smart,
- Ext_raw_html,
- Ext_auto_identifiers]
-getDefaultExtensions _ = extensionsFromList
- [Ext_auto_identifiers]
-
--- | Retrieve reader based on formatSpec (format+extensions).
-getReader :: PandocMonad m => String -> Either String (Reader m)
-getReader s =
- case parseFormatSpec s of
- Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
- Right (readerName, setExts) ->
- case lookup readerName readers of
- Nothing -> Left $ "Unknown reader: " ++ readerName
- Just (StringReader r) -> Right $ StringReader $ \o ->
- r o{ readerExtensions = setExts $
- getDefaultExtensions readerName }
- Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
- r o{ readerExtensions = setExts $
- getDefaultExtensions readerName }
-
-getWriter :: PandocMonad m => String -> Either String (Writer m)
-getWriter s
- = case parseFormatSpec s of
- Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
- Right (writerName, setExts) ->
- case lookup writerName writers of
- Nothing -> Left $ "Unknown writer: " ++ writerName
- Just (StringWriter r) -> Right $ StringWriter $
- \o -> r o{ writerExtensions = setExts $
- getDefaultExtensions writerName }
- Just (ByteStringWriter r) -> Right $ ByteStringWriter $
- \o -> r o{ writerExtensions = setExts $
- getDefaultExtensions writerName }
-
-readJSON :: ReaderOptions -> String -> Either PandocError Pandoc
-readJSON _ = mapLeft PandocParseError . eitherDecode' . UTF8.fromStringLazy
-
-writeJSON :: WriterOptions -> Pandoc -> String
-writeJSON _ = UTF8.toStringLazy . encode
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
deleted file mode 100644
index be8f26811..000000000
--- a/src/Text/Pandoc/App.hs
+++ /dev/null
@@ -1,1444 +0,0 @@
-{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-}
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.App
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley@edu>
- Stability : alpha
- Portability : portable
-
-Does a pandoc conversion based on command-line options.
--}
-module Text.Pandoc.App (
- convertWithOpts
- , Opt(..)
- , defaultOpts
- , parseOptions
- , options
- ) where
-import Text.Pandoc
-import Text.Pandoc.Builder (setMeta)
-import Text.Pandoc.PDF (makePDF)
-import Text.Pandoc.Walk (walk)
-import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8,
- headerShift, err, openURL, safeRead,
- readDataFile )
-import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
-import Text.Pandoc.XML ( toEntities )
-import Text.Pandoc.Highlighting (highlightingStyles)
-import Text.Pandoc.SelfContained ( makeSelfContained )
-import Text.Pandoc.Process (pipeProcess)
-import Skylighting ( Style, defaultSyntaxMap, Syntax(..) )
-import Text.Printf
-import System.Environment ( getEnvironment, getProgName, getArgs )
-import Control.Applicative ((<|>))
-import System.Exit ( ExitCode (..), exitSuccess )
-import System.FilePath
-import Data.Char ( toLower, toUpper )
-import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )
-import System.Directory ( getAppUserDataDirectory, findExecutable,
- doesFileExist, Permissions(..), getPermissions )
-import System.IO ( stdout, stderr )
-import System.IO.Error ( isDoesNotExistError )
-import qualified Control.Exception as E
-import Control.Exception.Extensible ( throwIO )
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad
-import Control.Monad.Trans
-import Data.Maybe (fromMaybe, isNothing, isJust)
-import Data.Foldable (foldrM)
-import Network.URI (parseURI, isURI, URI(..))
-import qualified Data.ByteString.Lazy as B
-import qualified Data.ByteString as BS
-import qualified Data.Map as M
-import Data.Aeson (eitherDecode', encode)
-import Data.Yaml (decode)
-import qualified Data.Yaml as Yaml
-import qualified Data.Text as T
-import System.Console.GetOpt
-import Text.Pandoc.Class (withMediaBag, PandocIO, getLog)
-import Paths_pandoc (getDataDir)
-#ifndef _WINDOWS
-import System.Posix.Terminal (queryTerminal)
-import System.Posix.IO (stdOutput)
-#endif
-
-parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
-parseOptions options' defaults = do
- rawArgs <- map UTF8.decodeArg <$> getArgs
- prg <- getProgName
-
- let (actions, args, unrecognizedOpts, errors) =
- getOpt' Permute options' rawArgs
-
- let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts
-
- unless (null errors && null unknownOptionErrors) $
- err 2 $ concat errors ++ unlines unknownOptionErrors ++
- ("Try " ++ prg ++ " --help for more information.")
-
- -- thread option data structure through all supplied option actions
- opts <- foldl (>>=) (return defaults) actions
- return (opts{ optInputFiles = args })
-
-convertWithOpts :: Opt -> IO ()
-convertWithOpts opts = do
- let args = optInputFiles opts
- let outputFile = optOutputFile opts
- let filters = optFilters opts
- let verbosity = optVerbosity opts
-
- when (optDumpArgs opts) $
- do UTF8.hPutStrLn stdout outputFile
- mapM_ (UTF8.hPutStrLn stdout) args
- exitSuccess
-
- epubStylesheet <- case optEpubStylesheet opts of
- Nothing -> return Nothing
- Just fp -> Just <$> UTF8.readFile fp
-
- epubMetadata <- case optEpubMetadata opts of
- Nothing -> return Nothing
- Just fp -> Just <$> UTF8.readFile fp
-
- let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css"
- let mathMethod =
- case (optKaTeXJS opts, optKaTeXStylesheet opts) of
- (Nothing, _) -> optHTMLMathMethod opts
- (Just js, ss) -> KaTeX js (fromMaybe csscdn ss)
-
-
- -- --bibliography implies -F pandoc-citeproc for backwards compatibility:
- let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) &&
- optCiteMethod opts `notElem` [Natbib, Biblatex] &&
- "pandoc-citeproc" `notElem` map takeBaseName filters
- let filters' = if needsCiteproc then "pandoc-citeproc" : filters
- else filters
-
- let sources = case args of
- [] -> ["-"]
- xs | optIgnoreArgs opts -> ["-"]
- | otherwise -> xs
-
- datadir <- case optDataDir opts of
- Nothing -> E.catch
- (Just <$> getAppUserDataDirectory "pandoc")
- (\e -> let _ = (e :: E.SomeException)
- in return Nothing)
- Just _ -> return $ optDataDir opts
-
- -- assign reader and writer based on options and filenames
- let readerName = case optReader opts of
- Nothing -> defaultReaderName
- (if any isURI sources
- then "html"
- else "markdown") sources
- Just x -> map toLower x
-
- let writerName = case optWriter opts of
- Nothing -> defaultWriterName outputFile
- Just x -> map toLower x
- let format = takeWhile (`notElem` ['+','-'])
- $ takeFileName writerName -- in case path to lua script
-
- let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
-
- let laTeXOutput = format `elem` ["latex", "beamer"]
- let conTeXtOutput = format == "context"
- let html5Output = format == "html5" || format == "html"
-
- -- disabling the custom writer for now
- writer <- if ".lua" `isSuffixOf` format
- -- note: use non-lowercased version writerName
- then error "custom writers disabled for now"
- else case getWriter writerName of
- Left e -> err 9 $
- if format == "pdf"
- then e ++
- "\nTo create a pdf with pandoc, use " ++
- "the latex or beamer writer and specify\n" ++
- "an output file with .pdf extension " ++
- "(pandoc -t latex -o filename.pdf)."
- else e
- Right w -> return (w :: Writer PandocIO)
-
- -- TODO: we have to get the input and the output into the state for
- -- the sake of the text2tags reader.
- reader <- case getReader readerName of
- Right r -> return (r :: Reader PandocIO)
- Left e -> err 7 e'
- where e' = case readerName of
- "pdf" -> e ++
- "\nPandoc can convert to PDF, but not from PDF."
- "doc" -> e ++
- "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc."
- _ -> e
-
- let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
-
- templ <- case optTemplate opts of
- _ | not standalone -> return Nothing
- Nothing -> do
- deftemp <- getDefaultTemplate datadir format
- case deftemp of
- Left e -> throwIO e
- Right t -> return (Just t)
- Just tp -> do
- -- strip off extensions
- let tp' = case takeExtension tp of
- "" -> tp <.> format
- _ -> tp
- Just <$> E.catch (UTF8.readFile tp')
- (\e -> if isDoesNotExistError e
- then E.catch
- (readDataFileUTF8 datadir
- ("templates" </> tp'))
- (\e' -> let _ = (e' :: E.SomeException)
- in throwIO e')
- else throwIO e)
-
- let addStringAsVariable varname s vars = return $ (varname, s) : vars
-
- let addContentsAsVariable varname fp vars = do
- s <- UTF8.readFile fp
- return $ (varname, s) : vars
-
- -- note: this reverses the list constructed in option parsing,
- -- which in turn was reversed from the command-line order,
- -- so we end up with the correct order in the variable list:
- let withList _ [] vars = return vars
- withList f (x:xs) vars = f x vars >>= withList f xs
-
- variables <- return (optVariables opts)
- >>=
- withList (addContentsAsVariable "include-before")
- (optIncludeBeforeBody opts)
- >>=
- withList (addContentsAsVariable "include-after")
- (optIncludeAfterBody opts)
- >>=
- withList (addContentsAsVariable "header-includes")
- (optIncludeInHeader opts)
- >>=
- withList (addStringAsVariable "css") (optCss opts)
- >>=
- maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts)
- >>=
- maybe return (addStringAsVariable "epub-cover-image")
- (optEpubCoverImage opts)
- >>=
- (\vars -> case mathMethod of
- LaTeXMathML Nothing -> do
- s <- readDataFileUTF8 datadir "LaTeXMathML.js"
- return $ ("mathml-script", s) : vars
- _ -> return vars)
- >>=
- (\vars -> if format == "dzslides"
- then do
- dztempl <- readDataFileUTF8 datadir
- ("dzslides" </> "template.html")
- let dzline = "<!-- {{{{ dzslides core"
- let dzcore = unlines
- $ dropWhile (not . (dzline `isPrefixOf`))
- $ lines dztempl
- return $ ("dzslides-core", dzcore) : vars
- else return vars)
-
- let sourceURL = case sources of
- [] -> Nothing
- (x:_) -> case parseURI x of
- Just u
- | uriScheme u `elem` ["http:","https:"] ->
- Just $ show u{ uriQuery = "",
- uriFragment = "" }
- _ -> Nothing
-
- let readerOpts = def{ readerStandalone = standalone
- , readerColumns = optColumns opts
- , readerTabStop = optTabStop opts
- , readerIndentedCodeClasses = optIndentedCodeClasses opts
- , readerApplyMacros = not laTeXOutput
- , readerDefaultImageExtension =
- optDefaultImageExtension opts
- , readerTrackChanges = optTrackChanges opts
- }
-
- highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
-
- let writerOptions = def { writerTemplate = templ,
- writerVariables = variables,
- writerTabStop = optTabStop opts,
- writerTableOfContents = optTableOfContents opts,
- writerHTMLMathMethod = mathMethod,
- writerIncremental = optIncremental opts,
- writerCiteMethod = optCiteMethod opts,
- writerNumberSections = optNumberSections opts,
- writerNumberOffset = optNumberOffset opts,
- writerSectionDivs = optSectionDivs opts,
- writerReferenceLinks = optReferenceLinks opts,
- writerReferenceLocation = optReferenceLocation opts,
- writerDpi = optDpi opts,
- writerWrapText = optWrapText opts,
- writerColumns = optColumns opts,
- writerEmailObfuscation = optEmailObfuscation opts,
- writerIdentifierPrefix = optIdentifierPrefix opts,
- writerSourceURL = sourceURL,
- writerUserDataDir = datadir,
- writerHtmlQTags = optHtmlQTags opts,
- writerTopLevelDivision = optTopLevelDivision opts,
- writerListings = optListings opts,
- writerSlideLevel = optSlideLevel opts,
- writerHighlightStyle = highlightStyle,
- writerSetextHeaders = optSetextHeaders opts,
- writerEpubMetadata = epubMetadata,
- writerEpubStylesheet = epubStylesheet,
- writerEpubFonts = optEpubFonts opts,
- writerEpubChapterLevel = optEpubChapterLevel opts,
- writerTOCDepth = optTOCDepth opts,
- writerReferenceDoc = optReferenceDoc opts,
- writerLaTeXArgs = optLaTeXEngineArgs opts
- }
-
-
-#ifdef _WINDOWS
- let istty = True
-#else
- istty <- queryTerminal stdOutput
-#endif
- when (istty && not (isTextFormat format) && outputFile == "-") $
- err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++
- "Specify an output file using the -o option."
-
-
- let transforms = case optBaseHeaderLevel opts of
- x | x > 1 -> [headerShift (x - 1)]
- | otherwise -> []
-
- let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
- then 0
- else optTabStop opts)
-
- readSources :: (Functor m, MonadIO m) => [FilePath] -> m String
- readSources srcs = convertTabs . intercalate "\n" <$>
- mapM readSource srcs
-
- let runIO' :: PandocIO a -> IO a
- runIO' f = do
- (res, reports) <- runIOorExplode $ do
- setVerbosity verbosity
- x <- f
- rs <- getLog
- return (x, rs)
- case optLogFile opts of
- Nothing -> return ()
- Just logfile -> B.writeFile logfile (encodeLogMessages reports)
- let isWarning msg = messageVerbosity msg == WARNING
- when (optFailIfWarnings opts && any isWarning reports) $
- err 3 "Failing because there were warnings."
- return res
-
- let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag)
- sourceToDoc sources' =
- case reader of
- StringReader r
- | optFileScope opts || readerName == "json" -> do
- pairs <- mapM
- (readSource >=> withMediaBag . r readerOpts) sources
- return (mconcat (map fst pairs), mconcat (map snd pairs))
- | otherwise ->
- readSources sources' >>= withMediaBag . r readerOpts
- ByteStringReader r -> do
- pairs <- mapM (readFile' >=>
- withMediaBag . r readerOpts) sources
- return (mconcat (map fst pairs), mconcat (map snd pairs))
-
- runIO' $ do
- (doc, media) <- sourceToDoc sources
- doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=>
- return . flip (foldr addMetadata) (optMetadata opts) >=>
- applyTransforms transforms >=>
- applyFilters datadir filters' [format]) doc
-
- case writer of
- -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile
- ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile
- StringWriter f
- | pdfOutput -> do
- -- make sure writer is latex or beamer or context or html5
- unless (laTeXOutput || conTeXtOutput || html5Output) $
- err 47 $ "cannot produce pdf output with " ++ format ++
- " writer"
-
- let pdfprog = case () of
- _ | conTeXtOutput -> "context"
- _ | html5Output -> "wkhtmltopdf"
- _ -> optLaTeXEngine opts
- -- check for pdf creating program
- mbPdfProg <- liftIO $ findExecutable pdfprog
- when (isNothing mbPdfProg) $
- err 41 $ pdfprog ++ " not found. " ++
- pdfprog ++ " is needed for pdf output."
-
- res <- makePDF pdfprog f writerOptions verbosity media doc'
- case res of
- Right pdf -> writeFnBinary outputFile pdf
- Left err' -> liftIO $ do
- B.hPutStr stderr err'
- B.hPut stderr $ B.pack [10]
- err 43 "Error producing PDF"
- | otherwise -> do
- let htmlFormat = format `elem`
- ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
- selfcontain = if optSelfContained opts && htmlFormat
- then makeSelfContained writerOptions media
- else return
- handleEntities = if htmlFormat && optAscii opts
- then toEntities
- else id
- output <- f writerOptions doc'
- selfcontain (output ++ ['\n' | not standalone]) >>=
- writerFn outputFile . handleEntities
-
-type Transform = Pandoc -> Pandoc
-
-isTextFormat :: String -> Bool
-isTextFormat s = s `notElem` ["odt","docx","epub","epub3"]
-
-externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc
-externalFilter f args' d = liftIO $ do
- exists <- doesFileExist f
- isExecutable <- if exists
- then executable <$> getPermissions f
- else return True
- let (f', args'') = if exists
- then case map toLower (takeExtension f) of
- _ | isExecutable -> ("." </> f, args')
- ".py" -> ("python", f:args')
- ".hs" -> ("runhaskell", f:args')
- ".pl" -> ("perl", f:args')
- ".rb" -> ("ruby", f:args')
- ".php" -> ("php", f:args')
- ".js" -> ("node", f:args')
- _ -> (f, args')
- else (f, args')
- unless (exists && isExecutable) $ do
- mbExe <- findExecutable f'
- when (isNothing mbExe) $
- err 83 $ "Error running filter " ++ f ++ ":\n" ++
- "Could not find executable '" ++ f' ++ "'."
- env <- getEnvironment
- let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env
- (exitcode, outbs) <- E.handle filterException $
- pipeProcess env' f' args'' $ encode d
- case exitcode of
- ExitSuccess -> return $ either error id $ eitherDecode' outbs
- ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++
- "Filter returned error status " ++ show ec
- where filterException :: E.SomeException -> IO a
- filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++
- show e
-
--- | Data structure for command line options.
-data Opt = Opt
- { optTabStop :: Int -- ^ Number of spaces per tab
- , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces
- , optStandalone :: Bool -- ^ Include header, footer
- , optReader :: Maybe String -- ^ Reader format
- , optWriter :: Maybe String -- ^ Writer format
- , optTableOfContents :: Bool -- ^ Include table of contents
- , optBaseHeaderLevel :: Int -- ^ Base header level
- , optTemplate :: Maybe FilePath -- ^ Custom template
- , optVariables :: [(String,String)] -- ^ Template variables to set
- , optMetadata :: [(String, String)] -- ^ Metadata fields to set
- , optOutputFile :: FilePath -- ^ Name of output file
- , optInputFiles :: [FilePath] -- ^ Names of input files
- , optNumberSections :: Bool -- ^ Number sections in LaTeX
- , optNumberOffset :: [Int] -- ^ Starting number for sections
- , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
- , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5
- , optSelfContained :: Bool -- ^ Make HTML accessible offline
- , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML
- , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code
- , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions
- , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
- , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
- , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet
- , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
- , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
- , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters
- , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub
- , optTOCDepth :: Int -- ^ Number of levels to include in TOC
- , optDumpArgs :: Bool -- ^ Output command-line arguments
- , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
- , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output
- , optLogFile :: Maybe FilePath -- ^ File to write JSON log output
- , optFailIfWarnings :: Bool -- ^ Fail on warnings
- , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output
- , optDpi :: Int -- ^ Dpi
- , optWrapText :: WrapOption -- ^ Options for wrapping text
- , optColumns :: Int -- ^ Line length in characters
- , optFilters :: [FilePath] -- ^ Filters to apply
- , optEmailObfuscation :: ObfuscationMethod
- , optIdentifierPrefix :: String
- , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
- , optDataDir :: Maybe FilePath
- , optCiteMethod :: CiteMethod -- ^ Method to output cites
- , optListings :: Bool -- ^ Use listings package for code blocks
- , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
- , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine
- , optSlideLevel :: Maybe Int -- ^ Header level that creates slides
- , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
- , optAscii :: Bool -- ^ Use ascii characters only in html
- , optDefaultImageExtension :: String -- ^ Default image extension
- , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
- , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
- , optFileScope :: Bool -- ^ Parse input files before combining
- , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX
- , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX
- , optTitlePrefix :: Maybe String -- ^ Prefix for title
- , optCss :: [FilePath] -- ^ CSS files to link to
- , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
- , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
- , optIncludeInHeader :: [FilePath] -- ^ Files to include in header
- }
-
--- | Defaults for command-line options.
-defaultOpts :: Opt
-defaultOpts = Opt
- { optTabStop = 4
- , optPreserveTabs = False
- , optStandalone = False
- , optReader = Nothing
- , optWriter = Nothing
- , optTableOfContents = False
- , optBaseHeaderLevel = 1
- , optTemplate = Nothing
- , optVariables = []
- , optMetadata = []
- , optOutputFile = "-" -- "-" means stdout
- , optInputFiles = []
- , optNumberSections = False
- , optNumberOffset = [0,0,0,0,0,0]
- , optSectionDivs = False
- , optIncremental = False
- , optSelfContained = False
- , optHtmlQTags = False
- , optHighlightStyle = Just "pygments"
- , optTopLevelDivision = TopLevelDefault
- , optHTMLMathMethod = PlainMath
- , optReferenceDoc = Nothing
- , optEpubStylesheet = Nothing
- , optEpubMetadata = Nothing
- , optEpubFonts = []
- , optEpubChapterLevel = 1
- , optEpubCoverImage = Nothing
- , optTOCDepth = 3
- , optDumpArgs = False
- , optIgnoreArgs = False
- , optVerbosity = WARNING
- , optLogFile = Nothing
- , optFailIfWarnings = False
- , optReferenceLinks = False
- , optReferenceLocation = EndOfDocument
- , optDpi = 96
- , optWrapText = WrapAuto
- , optColumns = 72
- , optFilters = []
- , optEmailObfuscation = NoObfuscation
- , optIdentifierPrefix = ""
- , optIndentedCodeClasses = []
- , optDataDir = Nothing
- , optCiteMethod = Citeproc
- , optListings = False
- , optLaTeXEngine = "pdflatex"
- , optLaTeXEngineArgs = []
- , optSlideLevel = Nothing
- , optSetextHeaders = True
- , optAscii = False
- , optDefaultImageExtension = ""
- , optExtractMedia = Nothing
- , optTrackChanges = AcceptChanges
- , optFileScope = False
- , optKaTeXStylesheet = Nothing
- , optKaTeXJS = Nothing
- , optTitlePrefix = Nothing
- , optCss = []
- , optIncludeBeforeBody = []
- , optIncludeAfterBody = []
- , optIncludeInHeader = []
- }
-
-addMetadata :: (String, String) -> Pandoc -> Pandoc
-addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs
- where meta' = case lookupMeta k meta of
- Nothing -> setMeta k v' meta
- Just (MetaList xs) ->
- setMeta k (MetaList (xs ++ [v'])) meta
- Just x -> setMeta k (MetaList [x, v']) meta
- v' = readMetaValue v
-
-readMetaValue :: String -> MetaValue
-readMetaValue s = case decode (UTF8.fromString s) of
- Just (Yaml.String t) -> MetaString $ T.unpack t
- Just (Yaml.Bool b) -> MetaBool b
- _ -> MetaString s
-
--- Determine default reader based on source file extensions
-defaultReaderName :: String -> [FilePath] -> String
-defaultReaderName fallback [] = fallback
-defaultReaderName fallback (x:xs) =
- case takeExtension (map toLower x) of
- ".xhtml" -> "html"
- ".html" -> "html"
- ".htm" -> "html"
- ".md" -> "markdown"
- ".markdown" -> "markdown"
- ".tex" -> "latex"
- ".latex" -> "latex"
- ".ltx" -> "latex"
- ".rst" -> "rst"
- ".org" -> "org"
- ".lhs" -> "markdown+lhs"
- ".db" -> "docbook"
- ".opml" -> "opml"
- ".wiki" -> "mediawiki"
- ".dokuwiki" -> "dokuwiki"
- ".textile" -> "textile"
- ".native" -> "native"
- ".json" -> "json"
- ".docx" -> "docx"
- ".t2t" -> "t2t"
- ".epub" -> "epub"
- ".odt" -> "odt"
- ".pdf" -> "pdf" -- so we get an "unknown reader" error
- ".doc" -> "doc" -- so we get an "unknown reader" error
- _ -> defaultReaderName fallback xs
-
--- Determine default writer based on output file extension
-defaultWriterName :: FilePath -> String
-defaultWriterName "-" = "html" -- no output file
-defaultWriterName x =
- case takeExtension (map toLower x) of
- "" -> "markdown" -- empty extension
- ".tex" -> "latex"
- ".latex" -> "latex"
- ".ltx" -> "latex"
- ".context" -> "context"
- ".ctx" -> "context"
- ".rtf" -> "rtf"
- ".rst" -> "rst"
- ".s5" -> "s5"
- ".native" -> "native"
- ".json" -> "json"
- ".txt" -> "markdown"
- ".text" -> "markdown"
- ".md" -> "markdown"
- ".markdown" -> "markdown"
- ".textile" -> "textile"
- ".lhs" -> "markdown+lhs"
- ".texi" -> "texinfo"
- ".texinfo" -> "texinfo"
- ".db" -> "docbook"
- ".odt" -> "odt"
- ".docx" -> "docx"
- ".epub" -> "epub"
- ".org" -> "org"
- ".asciidoc" -> "asciidoc"
- ".adoc" -> "asciidoc"
- ".pdf" -> "latex"
- ".fb2" -> "fb2"
- ".opml" -> "opml"
- ".icml" -> "icml"
- ".tei.xml" -> "tei"
- ".tei" -> "tei"
- ['.',y] | y `elem` ['1'..'9'] -> "man"
- _ -> "html"
-
--- Transformations of a Pandoc document post-parsing:
-
-extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc
-extractMedia media dir d =
- case [fp | (fp, _, _) <- mediaDirectory media] of
- [] -> return d
- fps -> do
- extractMediaBag True dir media
- return $ walk (adjustImagePath dir fps) d
-
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
- | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
-adjustImagePath _ _ x = x
-
-applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
-applyTransforms transforms d = return $ foldr ($) d transforms
-
- -- First we check to see if a filter is found. If not, and if it's
- -- not an absolute path, we check to see whether it's in `userdir/filters`.
- -- If not, we leave it unchanged.
-expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath
-expandFilterPath mbDatadir fp = liftIO $ do
- fpExists <- doesFileExist fp
- if fpExists
- then return fp
- else case mbDatadir of
- Just datadir | isRelative fp -> do
- let filterPath = (datadir </> "filters" </> fp)
- filterPathExists <- doesFileExist filterPath
- if filterPathExists
- then return filterPath
- else return fp
- _ -> return fp
-
-applyFilters :: MonadIO m
- => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
-applyFilters mbDatadir filters args d = do
- expandedFilters <- mapM (expandFilterPath mbDatadir) filters
- foldrM ($) d $ map (flip externalFilter args) expandedFilters
-
-readSource :: MonadIO m => FilePath -> m String
-readSource "-" = liftIO UTF8.getContents
-readSource src = case parseURI src of
- Just u | uriScheme u `elem` ["http:","https:"] ->
- readURI src
- | uriScheme u == "file:" ->
- liftIO $ UTF8.readFile (uriPath u)
- _ -> liftIO $ UTF8.readFile src
-
-readURI :: MonadIO m => FilePath -> m String
-readURI src = do
- res <- liftIO $ openURL src
- case res of
- Left e -> liftIO $ throwIO e
- Right (bs,_) -> return $ UTF8.toString bs
-
-readFile' :: MonadIO m => FilePath -> m B.ByteString
-readFile' "-" = liftIO $ B.getContents
-readFile' f = liftIO $ B.readFile f
-
-writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m ()
-writeFnBinary "-" = liftIO . B.putStr
-writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f)
-
-writerFn :: MonadIO m => FilePath -> String -> m ()
-writerFn "-" = liftIO . UTF8.putStr
-writerFn f = liftIO . UTF8.writeFile f
-
-lookupHighlightStyle :: Maybe String -> IO (Maybe Style)
-lookupHighlightStyle Nothing = return Nothing
-lookupHighlightStyle (Just s) =
- case lookup (map toLower s) highlightingStyles of
- Just sty -> return (Just sty)
- Nothing -> err 68 $ "Unknown highlight-style " ++ s
-
--- | A list of functions, each transforming the options data structure
--- in response to a command-line option.
-options :: [OptDescr (Opt -> IO Opt)]
-options =
- [ Option "fr" ["from","read"]
- (ReqArg
- (\arg opt -> return opt { optReader = Just arg })
- "FORMAT")
- ""
-
- , Option "tw" ["to","write"]
- (ReqArg
- (\arg opt -> return opt { optWriter = Just arg })
- "FORMAT")
- ""
-
- , Option "o" ["output"]
- (ReqArg
- (\arg opt -> return opt { optOutputFile = arg })
- "FILENAME")
- "" -- "Name of output file"
-
- , Option "" ["data-dir"]
- (ReqArg
- (\arg opt -> return opt { optDataDir = Just arg })
- "DIRECTORY") -- "Directory containing pandoc data files."
- ""
-
- , Option "" ["base-header-level"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t > 0 && t < 6 -> do
- return opt{ optBaseHeaderLevel = t }
- _ -> err 19
- "base-header-level must be 1-5")
- "NUMBER")
- "" -- "Headers base level"
-
- , Option "" ["indented-code-classes"]
- (ReqArg
- (\arg opt -> return opt { optIndentedCodeClasses = words $
- map (\c -> if c == ',' then ' ' else c) arg })
- "STRING")
- "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
-
- , Option "F" ["filter"]
- (ReqArg
- (\arg opt -> return opt { optFilters = arg : optFilters opt })
- "PROGRAM")
- "" -- "External JSON filter"
-
- , Option "p" ["preserve-tabs"]
- (NoArg
- (\opt -> return opt { optPreserveTabs = True }))
- "" -- "Preserve tabs instead of converting to spaces"
-
- , Option "" ["tab-stop"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t > 0 -> return opt { optTabStop = t }
- _ -> err 31
- "tab-stop must be a number greater than 0")
- "NUMBER")
- "" -- "Tab stop (default 4)"
-
- , Option "" ["track-changes"]
- (ReqArg
- (\arg opt -> do
- action <- case arg of
- "accept" -> return AcceptChanges
- "reject" -> return RejectChanges
- "all" -> return AllChanges
- _ -> err 6
- ("Unknown option for track-changes: " ++ arg)
- return opt { optTrackChanges = action })
- "accept|reject|all")
- "" -- "Accepting or reject MS Word track-changes.""
-
- , Option "" ["file-scope"]
- (NoArg
- (\opt -> return opt { optFileScope = True }))
- "" -- "Parse input files before combining"
-
- , Option "" ["extract-media"]
- (ReqArg
- (\arg opt ->
- return opt { optExtractMedia = Just arg })
- "PATH")
- "" -- "Directory to which to extract embedded media"
-
- , Option "s" ["standalone"]
- (NoArg
- (\opt -> return opt { optStandalone = True }))
- "" -- "Include needed header and footer on output"
-
- , Option "" ["template"]
- (ReqArg
- (\arg opt ->
- return opt{ optTemplate = Just arg,
- optStandalone = True })
- "FILENAME")
- "" -- "Use custom template"
-
- , Option "M" ["metadata"]
- (ReqArg
- (\arg opt -> do
- let (key, val) = splitField arg
- return opt{ optMetadata = (key, val) : optMetadata opt })
- "KEY[:VALUE]")
- ""
-
- , Option "V" ["variable"]
- (ReqArg
- (\arg opt -> do
- let (key, val) = splitField arg
- return opt{ optVariables = (key, val) : optVariables opt })
- "KEY[:VALUE]")
- ""
-
- , Option "D" ["print-default-template"]
- (ReqArg
- (\arg _ -> do
- templ <- getDefaultTemplate Nothing arg
- case templ of
- Right t -> UTF8.hPutStr stdout t
- Left e -> error $ show e
- exitSuccess)
- "FORMAT")
- "" -- "Print default template for FORMAT"
-
- , Option "" ["print-default-data-file"]
- (ReqArg
- (\arg _ -> do
- readDataFile Nothing arg >>= BS.hPutStr stdout
- exitSuccess)
- "FILE")
- "" -- "Print default data file"
-
- , Option "" ["dpi"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t > 0 -> return opt { optDpi = t }
- _ -> err 31
- "dpi must be a number greater than 0")
- "NUMBER")
- "" -- "Dpi (default 96)"
-
- , Option "" ["wrap"]
- (ReqArg
- (\arg opt ->
- case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of
- Just o -> return opt { optWrapText = o }
- Nothing -> err 77 "--wrap must be auto, none, or preserve")
- "auto|none|preserve")
- "" -- "Option for wrapping text in output"
-
- , Option "" ["columns"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t > 0 -> return opt { optColumns = t }
- _ -> err 33
- "columns must be a number greater than 0")
- "NUMBER")
- "" -- "Length of line in characters"
-
- , Option "" ["toc", "table-of-contents"]
- (NoArg
- (\opt -> return opt { optTableOfContents = True }))
- "" -- "Include table of contents"
-
- , Option "" ["toc-depth"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t >= 1 && t <= 6 ->
- return opt { optTOCDepth = t }
- _ -> err 57
- "TOC level must be a number between 1 and 6")
- "NUMBER")
- "" -- "Number of levels to include in TOC"
-
- , Option "" ["no-highlight"]
- (NoArg
- (\opt -> return opt { optHighlightStyle = Nothing }))
- "" -- "Don't highlight source code"
-
- , Option "" ["highlight-style"]
- (ReqArg
- (\arg opt -> return opt{ optHighlightStyle = Just arg })
- "STYLE")
- "" -- "Style for highlighted code"
-
- , Option "H" ["include-in-header"]
- (ReqArg
- (\arg opt -> return opt{ optIncludeInHeader =
- arg : optIncludeInHeader opt,
- optStandalone = True })
- "FILENAME")
- "" -- "File to include at end of header (implies -s)"
-
- , Option "B" ["include-before-body"]
- (ReqArg
- (\arg opt -> return opt{ optIncludeBeforeBody =
- arg : optIncludeBeforeBody opt,
- optStandalone = True })
- "FILENAME")
- "" -- "File to include before document body"
-
- , Option "A" ["include-after-body"]
- (ReqArg
- (\arg opt -> return opt{ optIncludeAfterBody =
- arg : optIncludeAfterBody opt,
- optStandalone = True })
- "FILENAME")
- "" -- "File to include after document body"
-
- , Option "" ["self-contained"]
- (NoArg
- (\opt -> return opt { optSelfContained = True,
- optStandalone = True }))
- "" -- "Make slide shows include all the needed js and css"
-
- , Option "" ["html-q-tags"]
- (NoArg
- (\opt ->
- return opt { optHtmlQTags = True }))
- "" -- "Use <q> tags for quotes in HTML"
-
- , Option "" ["ascii"]
- (NoArg
- (\opt -> return opt { optAscii = True }))
- "" -- "Use ascii characters only in HTML output"
-
- , Option "" ["reference-links"]
- (NoArg
- (\opt -> return opt { optReferenceLinks = True } ))
- "" -- "Use reference links in parsing HTML"
-
- , Option "" ["reference-location"]
- (ReqArg
- (\arg opt -> do
- action <- case arg of
- "block" -> return EndOfBlock
- "section" -> return EndOfSection
- "document" -> return EndOfDocument
- _ -> err 6
- ("Unknown option for reference-location: " ++ arg)
- return opt { optReferenceLocation = action })
- "block|section|document")
- "" -- "Accepting or reject MS Word track-changes.""
-
- , Option "" ["atx-headers"]
- (NoArg
- (\opt -> return opt { optSetextHeaders = False } ))
- "" -- "Use atx-style headers for markdown"
-
- , Option "" ["top-level-division"]
- (ReqArg
- (\arg opt -> do
- let tldName = "TopLevel" ++ uppercaseFirstLetter arg
- case safeRead tldName of
- Just tlDiv -> return opt { optTopLevelDivision = tlDiv }
- _ -> err 76 ("Top-level division must be " ++
- "section, chapter, part, or default"))
- "section|chapter|part")
- "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook"
-
- , Option "N" ["number-sections"]
- (NoArg
- (\opt -> return opt { optNumberSections = True }))
- "" -- "Number sections in LaTeX"
-
- , Option "" ["number-offset"]
- (ReqArg
- (\arg opt ->
- case safeRead ('[':arg ++ "]") of
- Just ns -> return opt { optNumberOffset = ns,
- optNumberSections = True }
- _ -> err 57 "could not parse number-offset")
- "NUMBERS")
- "" -- "Starting number for sections, subsections, etc."
-
- , Option "" ["listings"]
- (NoArg
- (\opt -> return opt { optListings = True }))
- "" -- "Use listings package for LaTeX code blocks"
-
- , Option "i" ["incremental"]
- (NoArg
- (\opt -> return opt { optIncremental = True }))
- "" -- "Make list items display incrementally in Slidy/Slideous/S5"
-
- , Option "" ["slide-level"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t >= 1 && t <= 6 ->
- return opt { optSlideLevel = Just t }
- _ -> err 39
- "slide level must be a number between 1 and 6")
- "NUMBER")
- "" -- "Force header level for slides"
-
- , Option "" ["section-divs"]
- (NoArg
- (\opt -> return opt { optSectionDivs = True }))
- "" -- "Put sections in div tags in HTML"
-
- , Option "" ["default-image-extension"]
- (ReqArg
- (\arg opt -> return opt { optDefaultImageExtension = arg })
- "extension")
- "" -- "Default extension for extensionless images"
-
- , Option "" ["email-obfuscation"]
- (ReqArg
- (\arg opt -> do
- method <- case arg of
- "references" -> return ReferenceObfuscation
- "javascript" -> return JavascriptObfuscation
- "none" -> return NoObfuscation
- _ -> err 6
- ("Unknown obfuscation method: " ++ arg)
- return opt { optEmailObfuscation = method })
- "none|javascript|references")
- "" -- "Method for obfuscating email in HTML"
-
- , Option "" ["id-prefix"]
- (ReqArg
- (\arg opt -> return opt { optIdentifierPrefix = arg })
- "STRING")
- "" -- "Prefix to add to automatically generated HTML identifiers"
-
- , Option "T" ["title-prefix"]
- (ReqArg
- (\arg opt -> do
- let newvars = ("title-prefix", arg) : optVariables opt
- return opt { optVariables = newvars,
- optStandalone = True })
- "STRING")
- "" -- "String to prefix to HTML window title"
-
- , Option "c" ["css"]
- (ReqArg
- (\arg opt -> return opt{ optCss = arg : optCss opt })
- -- add new link to end, so it is included in proper order
- "URL")
- "" -- "Link to CSS style sheet"
-
- , Option "" ["reference-doc"]
- (ReqArg
- (\arg opt ->
- return opt { optReferenceDoc = Just arg })
- "FILENAME")
- "" -- "Path of custom reference doc"
-
- , Option "" ["epub-stylesheet"]
- (ReqArg
- (\arg opt -> return opt { optEpubStylesheet = Just arg })
- "FILENAME")
- "" -- "Path of epub.css"
-
- , Option "" ["epub-cover-image"]
- (ReqArg
- (\arg opt ->
- return opt { optVariables =
- ("epub-cover-image", arg) : optVariables opt })
- "FILENAME")
- "" -- "Path of epub cover image"
-
- , Option "" ["epub-metadata"]
- (ReqArg
- (\arg opt -> return opt { optEpubMetadata = Just arg })
- "FILENAME")
- "" -- "Path of epub metadata file"
-
- , Option "" ["epub-embed-font"]
- (ReqArg
- (\arg opt ->
- return opt{ optEpubFonts = arg : optEpubFonts opt })
- "FILE")
- "" -- "Directory of fonts to embed"
-
- , Option "" ["epub-chapter-level"]
- (ReqArg
- (\arg opt ->
- case safeRead arg of
- Just t | t >= 1 && t <= 6 ->
- return opt { optEpubChapterLevel = t }
- _ -> err 59
- "chapter level must be a number between 1 and 6")
- "NUMBER")
- "" -- "Header level at which to split chapters in EPUB"
-
- , Option "" ["latex-engine"]
- (ReqArg
- (\arg opt -> do
- let b = takeBaseName arg
- if b `elem` ["pdflatex", "lualatex", "xelatex"]
- then return opt { optLaTeXEngine = arg }
- else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.")
- "PROGRAM")
- "" -- "Name of latex program to use in generating PDF"
-
- , Option "" ["latex-engine-opt"]
- (ReqArg
- (\arg opt -> do
- let oldArgs = optLaTeXEngineArgs opt
- return opt { optLaTeXEngineArgs = arg : oldArgs })
- "STRING")
- "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used"
-
- , Option "" ["bibliography"]
- (ReqArg
- (\arg opt -> return opt{ optMetadata =
- ("bibliography", arg) : optMetadata opt })
- "FILE")
- ""
-
- , Option "" ["csl"]
- (ReqArg
- (\arg opt ->
- return opt{ optMetadata =
- ("csl", arg) : optMetadata opt })
- "FILE")
- ""
-
- , Option "" ["citation-abbreviations"]
- (ReqArg
- (\arg opt ->
- return opt{ optMetadata =
- ("citation-abbreviations", arg): optMetadata opt })
- "FILE")
- ""
-
- , Option "" ["natbib"]
- (NoArg
- (\opt -> return opt { optCiteMethod = Natbib }))
- "" -- "Use natbib cite commands in LaTeX output"
-
- , Option "" ["biblatex"]
- (NoArg
- (\opt -> return opt { optCiteMethod = Biblatex }))
- "" -- "Use biblatex cite commands in LaTeX output"
-
- , Option "m" ["latexmathml", "asciimathml"]
- (OptArg
- (\arg opt ->
- return opt { optHTMLMathMethod = LaTeXMathML arg })
- "URL")
- "" -- "Use LaTeXMathML script in html output"
-
- , Option "" ["mathml"]
- (NoArg
- (\opt ->
- return opt { optHTMLMathMethod = MathML }))
- "" -- "Use mathml for HTML math"
-
- , Option "" ["mimetex"]
- (OptArg
- (\arg opt -> do
- let url' = case arg of
- Just u -> u ++ "?"
- Nothing -> "/cgi-bin/mimetex.cgi?"
- return opt { optHTMLMathMethod = WebTeX url' })
- "URL")
- "" -- "Use mimetex for HTML math"
-
- , Option "" ["webtex"]
- (OptArg
- (\arg opt -> do
- let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg
- return opt { optHTMLMathMethod = WebTeX url' })
- "URL")
- "" -- "Use web service for HTML math"
-
- , Option "" ["jsmath"]
- (OptArg
- (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
- "URL")
- "" -- "Use jsMath for HTML math"
-
- , Option "" ["mathjax"]
- (OptArg
- (\arg opt -> do
- let url' = fromMaybe "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML-full" arg
- return opt { optHTMLMathMethod = MathJax url'})
- "URL")
- "" -- "Use MathJax for HTML math"
- , Option "" ["katex"]
- (OptArg
- (\arg opt ->
- return opt
- { optKaTeXJS =
- arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"})
- "URL")
- "" -- Use KaTeX for HTML Math
-
- , Option "" ["katex-stylesheet"]
- (ReqArg
- (\arg opt ->
- return opt { optKaTeXStylesheet = Just arg })
- "URL")
- "" -- Set the KaTeX Stylesheet location
-
- , Option "" ["gladtex"]
- (NoArg
- (\opt -> return opt { optHTMLMathMethod = GladTeX }))
- "" -- "Use gladtex for HTML math"
-
- , Option "" ["trace"]
- (NoArg
- (\opt -> return opt { optVerbosity = DEBUG }))
- "" -- "Turn on diagnostic tracing in readers."
-
- , Option "" ["dump-args"]
- (NoArg
- (\opt -> return opt { optDumpArgs = True }))
- "" -- "Print output filename and arguments to stdout."
-
- , Option "" ["ignore-args"]
- (NoArg
- (\opt -> return opt { optIgnoreArgs = True }))
- "" -- "Ignore command-line arguments."
-
- , Option "" ["verbose"]
- (NoArg
- (\opt -> return opt { optVerbosity = INFO }))
- "" -- "Verbose diagnostic output."
-
- , Option "" ["quiet"]
- (NoArg
- (\opt -> return opt { optVerbosity = ERROR }))
- "" -- "Suppress warnings."
-
- , Option "" ["fail-if-warnings"]
- (NoArg
- (\opt -> return opt { optFailIfWarnings = True }))
- "" -- "Exit with error status if there were warnings."
-
- , Option "" ["log"]
- (ReqArg
- (\arg opt -> return opt{ optLogFile = Just arg })
- "FILE")
- "" -- "Log messages in JSON format to this file."
-
- , Option "" ["bash-completion"]
- (NoArg
- (\_ -> do
- ddir <- getDataDir
- tpl <- readDataFileUTF8 Nothing "bash_completion.tpl"
- let optnames (Option shorts longs _ _) =
- map (\c -> ['-',c]) shorts ++
- map ("--" ++) longs
- let allopts = unwords (concatMap optnames options)
- UTF8.hPutStrLn stdout $ printf tpl allopts
- (unwords readers'names)
- (unwords writers'names)
- (unwords $ map fst highlightingStyles)
- ddir
- exitSuccess ))
- "" -- "Print bash completion script"
-
- , Option "" ["list-input-formats"]
- (NoArg
- (\_ -> do
- mapM_ (UTF8.hPutStrLn stdout) readers'names
- exitSuccess ))
- ""
-
- , Option "" ["list-output-formats"]
- (NoArg
- (\_ -> do
- mapM_ (UTF8.hPutStrLn stdout) writers'names
- exitSuccess ))
- ""
-
- , Option "" ["list-extensions"]
- (NoArg
- (\_ -> do
- let showExt x = drop 4 (show x) ++
- if extensionEnabled x pandocExtensions
- then " +"
- else " -"
- mapM_ (UTF8.hPutStrLn stdout . showExt)
- ([minBound..maxBound] :: [Extension])
- exitSuccess ))
- ""
-
- , Option "" ["list-highlight-languages"]
- (NoArg
- (\_ -> do
- let langs = [ T.unpack (T.toLower (sShortname s))
- | s <- M.elems defaultSyntaxMap
- , sShortname s `notElem`
- [T.pack "Alert", T.pack "Alert_indent"]
- ]
- mapM_ (UTF8.hPutStrLn stdout) langs
- exitSuccess ))
- ""
-
- , Option "" ["list-highlight-styles"]
- (NoArg
- (\_ -> do
- mapM_ (UTF8.hPutStrLn stdout) $
- map fst highlightingStyles
- exitSuccess ))
- ""
-
- , Option "v" ["version"]
- (NoArg
- (\_ -> do
- prg <- getProgName
- defaultDatadir <- E.catch
- (getAppUserDataDirectory "pandoc")
- (\e -> let _ = (e :: E.SomeException)
- in return "")
- UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++
- compileInfo ++ "\nDefault user data directory: " ++
- defaultDatadir ++ copyrightMessage)
- exitSuccess ))
- "" -- "Print version"
-
- , Option "h" ["help"]
- (NoArg
- (\_ -> do
- prg <- getProgName
- UTF8.hPutStr stdout (usageMessage prg options)
- exitSuccess ))
- "" -- "Show help"
-
- ]
-
--- Returns usage message
-usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
-usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
-
-copyrightMessage :: String
-copyrightMessage = intercalate "\n" [
- "",
- "Copyright (C) 2006-2017 John MacFarlane",
- "Web: http://pandoc.org",
- "This is free software; see the source for copying conditions.",
- "There is no warranty, not even for merchantability or fitness",
- "for a particular purpose." ]
-
-compileInfo :: String
-compileInfo =
- "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++
- VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting
-
-handleUnrecognizedOption :: String -> [String] -> [String]
-handleUnrecognizedOption "--smart" =
- (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++
- "For example: pandoc -f markdown+smart -t markdown-smart.") :)
-handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
-handleUnrecognizedOption "--old-dashes" =
- ("--old-dashes has been removed. Use +old_dashes extension instead." :)
-handleUnrecognizedOption "--no-wrap" =
- ("--no-wrap has been removed. Use --wrap=none instead." :)
-handleUnrecognizedOption "--chapters" =
- ("--chapters has been removed. Use --top-level-division=chapter instead." :)
-handleUnrecognizedOption "--reference-docx" =
- ("--reference-docx has been removed. Use --reference-doc instead." :)
-handleUnrecognizedOption "--reference-odt" =
- ("--reference-odt has been removed. Use --reference-doc instead." :)
-handleUnrecognizedOption "--parse-raw" =
- (("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n") :)
-handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
-handleUnrecognizedOption x =
- (("Unknown option " ++ x ++ ".") :)
-
-uppercaseFirstLetter :: String -> String
-uppercaseFirstLetter (c:cs) = toUpper c : cs
-uppercaseFirstLetter [] = []
-
-readers'names :: [String]
-readers'names = sort (map fst (readers :: [(String, Reader PandocIO)]))
-
-writers'names :: [String]
-writers'names = sort (map fst (writers :: [(String, Writer PandocIO)]))
-
-splitField :: String -> (String, String)
-splitField s =
- case break (`elem` ":=") s of
- (k,_:v) -> (k,v)
- (k,[]) -> (k,"true")
-
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
deleted file mode 100644
index 8eb1ba663..000000000
--- a/src/Text/Pandoc/Asciify.hs
+++ /dev/null
@@ -1,422 +0,0 @@
-{-
-Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Asciify
- Copyright : Copyright (C) 2013-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Function to convert accented latin letters to their unaccented
-ascii equivalents (used in constructing HTML identifiers).
--}
-module Text.Pandoc.Asciify (toAsciiChar)
-where
-import qualified Data.Map as M
-import Data.Char (isAscii)
-
-toAsciiChar :: Char -> Maybe Char
-toAsciiChar c | isAscii c = Just c
- | otherwise = M.lookup c asciiMap
-
-asciiMap :: M.Map Char Char
-asciiMap = M.fromList
- [('\192','A')
- ,('\193','A')
- ,('\194','A')
- ,('\195','A')
- ,('\196','A')
- ,('\197','A')
- ,('\199','C')
- ,('\200','E')
- ,('\201','E')
- ,('\202','E')
- ,('\203','E')
- ,('\204','I')
- ,('\205','I')
- ,('\206','I')
- ,('\207','I')
- ,('\209','N')
- ,('\210','O')
- ,('\211','O')
- ,('\212','O')
- ,('\213','O')
- ,('\214','O')
- ,('\217','U')
- ,('\218','U')
- ,('\219','U')
- ,('\220','U')
- ,('\221','Y')
- ,('\224','a')
- ,('\225','a')
- ,('\226','a')
- ,('\227','a')
- ,('\228','a')
- ,('\229','a')
- ,('\231','c')
- ,('\232','e')
- ,('\233','e')
- ,('\234','e')
- ,('\235','e')
- ,('\236','i')
- ,('\237','i')
- ,('\238','i')
- ,('\239','i')
- ,('\241','n')
- ,('\242','o')
- ,('\243','o')
- ,('\244','o')
- ,('\245','o')
- ,('\246','o')
- ,('\249','u')
- ,('\250','u')
- ,('\251','u')
- ,('\252','u')
- ,('\253','y')
- ,('\255','y')
- ,('\256','A')
- ,('\257','a')
- ,('\258','A')
- ,('\259','a')
- ,('\260','A')
- ,('\261','a')
- ,('\262','C')
- ,('\263','c')
- ,('\264','C')
- ,('\265','c')
- ,('\266','C')
- ,('\267','c')
- ,('\268','C')
- ,('\269','c')
- ,('\270','D')
- ,('\271','d')
- ,('\274','E')
- ,('\275','e')
- ,('\276','E')
- ,('\277','e')
- ,('\278','E')
- ,('\279','e')
- ,('\280','E')
- ,('\281','e')
- ,('\282','E')
- ,('\283','e')
- ,('\284','G')
- ,('\285','g')
- ,('\286','G')
- ,('\287','g')
- ,('\288','G')
- ,('\289','g')
- ,('\290','G')
- ,('\291','g')
- ,('\292','H')
- ,('\293','h')
- ,('\296','I')
- ,('\297','i')
- ,('\298','I')
- ,('\299','i')
- ,('\300','I')
- ,('\301','i')
- ,('\302','I')
- ,('\303','i')
- ,('\304','I')
- ,('\308','J')
- ,('\309','j')
- ,('\310','K')
- ,('\311','k')
- ,('\313','L')
- ,('\314','l')
- ,('\315','L')
- ,('\316','l')
- ,('\317','L')
- ,('\318','l')
- ,('\323','N')
- ,('\324','n')
- ,('\325','N')
- ,('\326','n')
- ,('\327','N')
- ,('\328','n')
- ,('\332','O')
- ,('\333','o')
- ,('\334','O')
- ,('\335','o')
- ,('\336','O')
- ,('\337','o')
- ,('\340','R')
- ,('\341','r')
- ,('\342','R')
- ,('\343','r')
- ,('\344','R')
- ,('\345','r')
- ,('\346','S')
- ,('\347','s')
- ,('\348','S')
- ,('\349','s')
- ,('\350','S')
- ,('\351','s')
- ,('\352','S')
- ,('\353','s')
- ,('\354','T')
- ,('\355','t')
- ,('\356','T')
- ,('\357','t')
- ,('\360','U')
- ,('\361','u')
- ,('\362','U')
- ,('\363','u')
- ,('\364','U')
- ,('\365','u')
- ,('\366','U')
- ,('\367','u')
- ,('\368','U')
- ,('\369','u')
- ,('\370','U')
- ,('\371','u')
- ,('\372','W')
- ,('\373','w')
- ,('\374','Y')
- ,('\375','y')
- ,('\376','Y')
- ,('\377','Z')
- ,('\378','z')
- ,('\379','Z')
- ,('\380','z')
- ,('\381','Z')
- ,('\382','z')
- ,('\416','O')
- ,('\417','o')
- ,('\431','U')
- ,('\432','u')
- ,('\461','A')
- ,('\462','a')
- ,('\463','I')
- ,('\464','i')
- ,('\465','O')
- ,('\466','o')
- ,('\467','U')
- ,('\468','u')
- ,('\486','G')
- ,('\487','g')
- ,('\488','K')
- ,('\489','k')
- ,('\490','O')
- ,('\491','o')
- ,('\496','j')
- ,('\500','G')
- ,('\501','g')
- ,('\504','N')
- ,('\505','n')
- ,('\512','A')
- ,('\513','a')
- ,('\514','A')
- ,('\515','a')
- ,('\516','E')
- ,('\517','e')
- ,('\518','E')
- ,('\519','e')
- ,('\520','I')
- ,('\521','i')
- ,('\522','I')
- ,('\523','i')
- ,('\524','O')
- ,('\525','o')
- ,('\526','O')
- ,('\527','o')
- ,('\528','R')
- ,('\529','r')
- ,('\530','R')
- ,('\531','r')
- ,('\532','U')
- ,('\533','u')
- ,('\534','U')
- ,('\535','u')
- ,('\536','S')
- ,('\537','s')
- ,('\538','T')
- ,('\539','t')
- ,('\542','H')
- ,('\543','h')
- ,('\550','A')
- ,('\551','a')
- ,('\552','E')
- ,('\553','e')
- ,('\558','O')
- ,('\559','o')
- ,('\562','Y')
- ,('\563','y')
- ,('\894',';')
- ,('\7680','A')
- ,('\7681','a')
- ,('\7682','B')
- ,('\7683','b')
- ,('\7684','B')
- ,('\7685','b')
- ,('\7686','B')
- ,('\7687','b')
- ,('\7690','D')
- ,('\7691','d')
- ,('\7692','D')
- ,('\7693','d')
- ,('\7694','D')
- ,('\7695','d')
- ,('\7696','D')
- ,('\7697','d')
- ,('\7698','D')
- ,('\7699','d')
- ,('\7704','E')
- ,('\7705','e')
- ,('\7706','E')
- ,('\7707','e')
- ,('\7710','F')
- ,('\7711','f')
- ,('\7712','G')
- ,('\7713','g')
- ,('\7714','H')
- ,('\7715','h')
- ,('\7716','H')
- ,('\7717','h')
- ,('\7718','H')
- ,('\7719','h')
- ,('\7720','H')
- ,('\7721','h')
- ,('\7722','H')
- ,('\7723','h')
- ,('\7724','I')
- ,('\7725','i')
- ,('\7728','K')
- ,('\7729','k')
- ,('\7730','K')
- ,('\7731','k')
- ,('\7732','K')
- ,('\7733','k')
- ,('\7734','L')
- ,('\7735','l')
- ,('\7738','L')
- ,('\7739','l')
- ,('\7740','L')
- ,('\7741','l')
- ,('\7742','M')
- ,('\7743','m')
- ,('\7744','M')
- ,('\7745','m')
- ,('\7746','M')
- ,('\7747','m')
- ,('\7748','N')
- ,('\7749','n')
- ,('\7750','N')
- ,('\7751','n')
- ,('\7752','N')
- ,('\7753','n')
- ,('\7754','N')
- ,('\7755','n')
- ,('\7764','P')
- ,('\7765','p')
- ,('\7766','P')
- ,('\7767','p')
- ,('\7768','R')
- ,('\7769','r')
- ,('\7770','R')
- ,('\7771','r')
- ,('\7774','R')
- ,('\7775','r')
- ,('\7776','S')
- ,('\7777','s')
- ,('\7778','S')
- ,('\7779','s')
- ,('\7786','T')
- ,('\7787','t')
- ,('\7788','T')
- ,('\7789','t')
- ,('\7790','T')
- ,('\7791','t')
- ,('\7792','T')
- ,('\7793','t')
- ,('\7794','U')
- ,('\7795','u')
- ,('\7796','U')
- ,('\7797','u')
- ,('\7798','U')
- ,('\7799','u')
- ,('\7804','V')
- ,('\7805','v')
- ,('\7806','V')
- ,('\7807','v')
- ,('\7808','W')
- ,('\7809','w')
- ,('\7810','W')
- ,('\7811','w')
- ,('\7812','W')
- ,('\7813','w')
- ,('\7814','W')
- ,('\7815','w')
- ,('\7816','W')
- ,('\7817','w')
- ,('\7818','X')
- ,('\7819','x')
- ,('\7820','X')
- ,('\7821','x')
- ,('\7822','Y')
- ,('\7823','y')
- ,('\7824','Z')
- ,('\7825','z')
- ,('\7826','Z')
- ,('\7827','z')
- ,('\7828','Z')
- ,('\7829','z')
- ,('\7830','h')
- ,('\7831','t')
- ,('\7832','w')
- ,('\7833','y')
- ,('\7840','A')
- ,('\7841','a')
- ,('\7842','A')
- ,('\7843','a')
- ,('\7864','E')
- ,('\7865','e')
- ,('\7866','E')
- ,('\7867','e')
- ,('\7868','E')
- ,('\7869','e')
- ,('\7880','I')
- ,('\7881','i')
- ,('\7882','I')
- ,('\7883','i')
- ,('\7884','O')
- ,('\7885','o')
- ,('\7886','O')
- ,('\7887','o')
- ,('\7908','U')
- ,('\7909','u')
- ,('\7910','U')
- ,('\7911','u')
- ,('\7922','Y')
- ,('\7923','y')
- ,('\7924','Y')
- ,('\7925','y')
- ,('\7926','Y')
- ,('\7927','y')
- ,('\7928','Y')
- ,('\7929','y')
- ,('\8175','`')
- ,('\8490','K')
- ,('\8800','=')
- ,('\8814','<')
- ,('\8815','>')
- ]
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
deleted file mode 100644
index f479ed9d0..000000000
--- a/src/Text/Pandoc/CSS.hs
+++ /dev/null
@@ -1,43 +0,0 @@
-module Text.Pandoc.CSS ( foldOrElse
- , pickStyleAttrProps
- , pickStylesToKVs
- )
-where
-
-import Text.Pandoc.Shared (trim)
-import Text.Parsec
-import Text.Parsec.String
-
-ruleParser :: Parser (String, String)
-ruleParser = do
- p <- many1 (noneOf ":") <* char ':'
- v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces
- return (trim p, trim v)
-
-styleAttrParser :: Parser [(String, String)]
-styleAttrParser = many1 ruleParser
-
-orElse :: Eq a => a -> a -> a -> a
-orElse v x y = if v == x then y else x
-
-foldOrElse :: Eq a => a -> [a] -> a
-foldOrElse v xs = foldr (orElse v) v xs
-
-eitherToMaybe :: Either a b -> Maybe b
-eitherToMaybe (Right x) = Just x
-eitherToMaybe _ = Nothing
-
--- | takes a list of keys/properties and a CSS string and
--- returns the corresponding key-value-pairs.
-pickStylesToKVs :: [String] -> String -> [(String, String)]
-pickStylesToKVs props styleAttr =
- case parse styleAttrParser "" styleAttr of
- Left _ -> []
- Right styles -> filter (\s -> fst s `elem` props) styles
-
--- | takes a list of key/property synonyms and a CSS string and maybe
--- returns the value of the first match (in order of the supplied list)
-pickStyleAttrProps :: [String] -> String -> Maybe String
-pickStyleAttrProps lookupProps styleAttr = do
- styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
- foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
deleted file mode 100644
index fb148666c..000000000
--- a/src/Text/Pandoc/Class.hs
+++ /dev/null
@@ -1,539 +0,0 @@
-{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances,
-FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts #-}
-
-{-
-Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Class
- Copyright : Copyright (C) 2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Typeclass for pandoc readers and writers, allowing both IO and pure instances.
--}
-
-module Text.Pandoc.Class ( PandocMonad(..)
- , CommonState(..)
- , PureState(..)
- , getPureState
- , getsPureState
- , putPureState
- , modifyPureState
- , getPOSIXTime
- , getZonedTime
- , readFileFromDirs
- , report
- , getLog
- , setVerbosity
- , getMediaBag
- , setMediaBag
- , insertMedia
- , fetchItem
- , getInputFiles
- , getOutputFile
- , PandocIO(..)
- , PandocPure(..)
- , FileTree(..)
- , FileInfo(..)
- , runIO
- , runIOorExplode
- , runPure
- , withMediaBag
- ) where
-
-import Prelude hiding (readFile)
-import System.Random (StdGen, next, mkStdGen)
-import qualified System.Random as IO (newStdGen)
-import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
-import Data.Unique (hashUnique)
-import qualified Data.Unique as IO (newUnique)
-import qualified Text.Pandoc.Shared as IO ( readDataFile
- , openURL )
-import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Compat.Time (UTCTime)
-import Text.Pandoc.Logging
-import Text.Parsec (ParsecT)
-import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
-import Text.Pandoc.MIME (MimeType, getMimeType)
-import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
- , posixSecondsToUTCTime
- , POSIXTime )
-import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
-import Network.URI ( escapeURIString, nonStrictRelativeTo,
- unEscapeString, parseURIReference, isAllowedInURI,
- parseURI, URI(..) )
-import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import qualified Text.Pandoc.MediaBag as MB
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified System.Environment as IO (lookupEnv)
-import System.FilePath.Glob (match, compile)
-import System.FilePath ((</>), takeExtension, dropExtension)
-import qualified System.FilePath.Glob as IO (glob)
-import qualified System.Directory as IO (getModificationTime)
-import Control.Monad as M (fail)
-import Control.Monad.Reader (ReaderT)
-import Control.Monad.State
-import Control.Monad.Except
-import Control.Monad.Writer (WriterT)
-import Control.Monad.RWS (RWST)
-import Data.Word (Word8)
-import Data.Default
-import System.IO.Error
-import System.IO (stderr)
-import qualified Data.Map as M
-import Text.Pandoc.Error
-import Text.Printf (printf)
-
-class (Functor m, Applicative m, Monad m, MonadError PandocError m)
- => PandocMonad m where
- lookupEnv :: String -> m (Maybe String)
- getCurrentTime :: m UTCTime
- getCurrentTimeZone :: m TimeZone
- newStdGen :: m StdGen
- newUniqueHash :: m Int
- openURL :: String -> m (B.ByteString, Maybe MimeType)
- readFileLazy :: FilePath -> m BL.ByteString
- readFileStrict :: FilePath -> m B.ByteString
- readDataFile :: Maybe FilePath
- -> FilePath
- -> m B.ByteString
- glob :: String -> m [FilePath]
- getModificationTime :: FilePath -> m UTCTime
- getCommonState :: m CommonState
- putCommonState :: CommonState -> m ()
-
- getsCommonState :: (CommonState -> a) -> m a
- getsCommonState f = f <$> getCommonState
-
- modifyCommonState :: (CommonState -> CommonState) -> m ()
- modifyCommonState f = getCommonState >>= putCommonState . f
-
- logOutput :: LogMessage -> m ()
-
--- Functions defined for all PandocMonad instances
-
-setVerbosity :: PandocMonad m => Verbosity -> m ()
-setVerbosity verbosity =
- modifyCommonState $ \st -> st{ stVerbosity = verbosity }
-
-getLog :: PandocMonad m => m [LogMessage]
-getLog = reverse <$> getsCommonState stLog
-
-report :: PandocMonad m => LogMessage -> m ()
-report msg = do
- verbosity <- getsCommonState stVerbosity
- let level = messageVerbosity msg
- when (level <= verbosity) $ do
- logOutput msg
- unless (level == DEBUG) $
- modifyCommonState $ \st -> st{ stLog = msg : stLog st }
-
-setMediaBag :: PandocMonad m => MediaBag -> m ()
-setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
-
-getMediaBag :: PandocMonad m => m MediaBag
-getMediaBag = getsCommonState stMediaBag
-
-insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
-insertMedia fp mime bs = do
- mb <- getsCommonState stMediaBag
- let mb' = MB.insertMedia fp mime bs mb
- modifyCommonState $ \st -> st{stMediaBag = mb' }
-
-getInputFiles :: PandocMonad m => m (Maybe [FilePath])
-getInputFiles = getsCommonState stInputFiles
-
-getOutputFile :: PandocMonad m => m (Maybe FilePath)
-getOutputFile = getsCommonState stOutputFile
-
-getPOSIXTime :: (PandocMonad m) => m POSIXTime
-getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
-
-getZonedTime :: (PandocMonad m) => m ZonedTime
-getZonedTime = do
- t <- getCurrentTime
- tz <- getCurrentTimeZone
- return $ utcToZonedTime tz t
-
--- | Read file, checking in any number of directories.
-readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
-readFileFromDirs [] _ = return Nothing
-readFileFromDirs (d:ds) f = catchError
- ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
- (\_ -> readFileFromDirs ds f)
-
---
-
-data CommonState = CommonState { stLog :: [LogMessage]
- , stMediaBag :: MediaBag
- , stInputFiles :: Maybe [FilePath]
- , stOutputFile :: Maybe FilePath
- , stVerbosity :: Verbosity
- }
-
-instance Default CommonState where
- def = CommonState { stLog = []
- , stMediaBag = mempty
- , stInputFiles = Nothing
- , stOutputFile = Nothing
- , stVerbosity = WARNING
- }
-
-runIO :: PandocIO a -> IO (Either PandocError a)
-runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-
-withMediaBag :: PandocMonad m => m a -> m (a, MediaBag)
-withMediaBag ma = ((,)) <$> ma <*> getMediaBag
-
-runIOorExplode :: PandocIO a -> IO a
-runIOorExplode ma = runIO ma >>= handleError
-
-newtype PandocIO a = PandocIO {
- unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
- } deriving ( MonadIO
- , Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
-instance PandocMonad PandocIO where
- lookupEnv = liftIO . IO.lookupEnv
- getCurrentTime = liftIO IO.getCurrentTime
- getCurrentTimeZone = liftIO IO.getCurrentTimeZone
- newStdGen = liftIO IO.newStdGen
- newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
- openURL u = do
- eitherRes <- liftIO $ (tryIOError $ IO.openURL u)
- case eitherRes of
- Right (Right res) -> return res
- Right (Left _) -> throwError $ PandocFileReadError u
- Left _ -> throwError $ PandocFileReadError u
- readFileLazy s = do
- eitherBS <- liftIO (tryIOError $ BL.readFile s)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError s
- readFileStrict s = do
- eitherBS <- liftIO (tryIOError $ B.readFile s)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError s
- -- TODO: Make this more sensitive to the different sorts of failure
- readDataFile mfp fname = do
- eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname)
- case eitherBS of
- Right bs -> return bs
- Left _ -> throwError $ PandocFileReadError fname
- glob = liftIO . IO.glob
- getModificationTime fp = do
- eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp)
- case eitherMtime of
- Right mtime -> return mtime
- Left _ -> throwError $ PandocFileReadError fp
- getCommonState = PandocIO $ lift get
- putCommonState x = PandocIO $ lift $ put x
- logOutput msg =
- liftIO $ UTF8.hPutStrLn stderr $ printf "%-7s %s"
- (show (messageVerbosity msg)) (showLogMessage msg)
-
--- | Specialized version of parseURIReference that disallows
--- single-letter schemes. Reason: these are usually windows absolute
--- paths.
-parseURIReference' :: String -> Maybe URI
-parseURIReference' s =
- case parseURIReference s of
- Just u
- | length (uriScheme u) > 2 -> Just u
- | null (uriScheme u) -> Just u -- protocol-relative
- _ -> Nothing
-
--- | Fetch an image or other item from the local filesystem or the net.
--- Returns raw content and maybe mime type.
-fetchItem :: PandocMonad m
- => Maybe String
- -> String
- -> m (B.ByteString, Maybe MimeType)
-fetchItem sourceURL s = do
- mediabag <- getMediaBag
- case lookupMedia s mediabag of
- Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
- Nothing -> downloadOrRead sourceURL s
-
-downloadOrRead :: PandocMonad m
- => Maybe String
- -> String
- -> m (B.ByteString, Maybe MimeType)
-downloadOrRead sourceURL s = do
- case (sourceURL >>= parseURIReference' .
- ensureEscaped, ensureEscaped s) of
- (Just u, s') -> -- try fetching from relative path at source
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
- Nothing -> openURL s' -- will throw error
- (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
- case parseURIReference' s' of
- Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
- Nothing -> openURL s' -- will throw error
- (Nothing, s') ->
- case parseURI s' of -- requires absolute URI
- -- We don't want to treat C:/ as a scheme:
- Just u' | length (uriScheme u') > 2 -> openURL (show u')
- Just u' | uriScheme u' == "file:" ->
- readLocalFile $ dropWhile (=='/') (uriPath u')
- _ -> readLocalFile fp -- get from local file system
- where readLocalFile f = do
- cont <- readFileStrict f
- return (cont, mime)
- httpcolon = URI{ uriScheme = "http:",
- uriAuthority = Nothing,
- uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
- fp = unEscapeString $ dropFragmentAndQuery s
- mime = case takeExtension fp of
- ".gz" -> getMimeType $ dropExtension fp
- ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
- x -> getMimeType x
- ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
- convertSlash '\\' = '/'
- convertSlash x = x
-
-data PureState = PureState { stStdGen :: StdGen
- , stWord8Store :: [Word8] -- should be
- -- inifinite,
- -- i.e. [1..]
- , stUniqStore :: [Int] -- should be
- -- inifinite and
- -- contain every
- -- element at most
- -- once, e.g. [1..]
- , stEnv :: [(String, String)]
- , stTime :: UTCTime
- , stTimeZone :: TimeZone
- , stReferenceDocx :: Archive
- , stReferenceODT :: Archive
- , stFiles :: FileTree
- , stUserDataDir :: FileTree
- , stCabalDataDir :: FileTree
- , stFontFiles :: [FilePath]
- }
-
-instance Default PureState where
- def = PureState { stStdGen = mkStdGen 1848
- , stWord8Store = [1..]
- , stUniqStore = [1..]
- , stEnv = [("USER", "pandoc-user")]
- , stTime = posixSecondsToUTCTime 0
- , stTimeZone = utc
- , stReferenceDocx = emptyArchive
- , stReferenceODT = emptyArchive
- , stFiles = mempty
- , stUserDataDir = mempty
- , stCabalDataDir = mempty
- , stFontFiles = []
- }
-
-
-getPureState :: PandocPure PureState
-getPureState = PandocPure $ lift $ lift $ get
-
-getsPureState :: (PureState -> a) -> PandocPure a
-getsPureState f = f <$> getPureState
-
-putPureState :: PureState -> PandocPure ()
-putPureState ps= PandocPure $ lift $ lift $ put ps
-
-modifyPureState :: (PureState -> PureState) -> PandocPure ()
-modifyPureState f = PandocPure $ lift $ lift $ modify f
-
-
-data FileInfo = FileInfo { infoFileMTime :: UTCTime
- , infoFileContents :: B.ByteString
- }
-
-newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
- deriving (Monoid)
-
-getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
-getFileInfo fp tree = M.lookup fp $ unFileTree tree
-
-
-newtype PandocPure a = PandocPure {
- unPandocPure :: ExceptT PandocError
- (StateT CommonState (State PureState)) a
- } deriving ( Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
-runPure :: PandocPure a -> Either PandocError a
-runPure x = flip evalState def $
- flip evalStateT def $
- runExceptT $
- unPandocPure x
-
-instance PandocMonad PandocPure where
- lookupEnv s = do
- env <- getsPureState stEnv
- return (lookup s env)
-
- getCurrentTime = getsPureState stTime
-
- getCurrentTimeZone = getsPureState stTimeZone
-
- newStdGen = do
- g <- getsPureState stStdGen
- let (_, nxtGen) = next g
- modifyPureState $ \st -> st { stStdGen = nxtGen }
- return g
-
- newUniqueHash = do
- uniqs <- getsPureState stUniqStore
- case uniqs of
- u : us -> do
- modifyPureState $ \st -> st { stUniqStore = us }
- return u
- _ -> M.fail "uniq store ran out of elements"
- openURL _ = throwError $ PandocSomeError "Cannot open URL in PandocPure"
- readFileLazy fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return (BL.fromStrict bs)
- Nothing -> throwError $ PandocFileReadError fp
- readFileStrict fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return bs
- Nothing -> throwError $ PandocFileReadError fp
- readDataFile Nothing "reference.docx" = do
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
- readDataFile Nothing "reference.odt" = do
- (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
- readDataFile Nothing fname = do
- let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
- readFileStrict fname'
- readDataFile (Just userDir) fname = do
- userDirFiles <- getsPureState stUserDataDir
- case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
- Just bs -> return bs
- Nothing -> readDataFile Nothing fname
-
- glob s = do
- fontFiles <- getsPureState stFontFiles
- return (filter (match (compile s)) fontFiles)
-
- getModificationTime fp = do
- fps <- getsPureState stFiles
- case infoFileMTime <$> (getFileInfo fp fps) of
- Just tm -> return tm
- Nothing -> throwError $ PandocFileReadError fp
-
- getCommonState = PandocPure $ lift $ get
- putCommonState x = PandocPure $ lift $ put x
-
- logOutput _msg = return ()
-
-instance PandocMonad m => PandocMonad (ParsecT s st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance PandocMonad m => PandocMonad (ReaderT r m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
-instance PandocMonad m => PandocMonad (StateT st m) where
- lookupEnv = lift . lookupEnv
- getCurrentTime = lift getCurrentTime
- getCurrentTimeZone = lift getCurrentTimeZone
- newStdGen = lift newStdGen
- newUniqueHash = lift newUniqueHash
- openURL = lift . openURL
- readFileLazy = lift . readFileLazy
- readFileStrict = lift . readFileStrict
- readDataFile mbuserdir = lift . readDataFile mbuserdir
- glob = lift . glob
- getModificationTime = lift . getModificationTime
- getCommonState = lift getCommonState
- putCommonState = lift . putCommonState
- logOutput = lift . logOutput
-
diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs
deleted file mode 100644
index b1cde82a4..000000000
--- a/src/Text/Pandoc/Compat/Time.hs
+++ /dev/null
@@ -1,30 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-{-
-This compatibility module is needed because, in time 1.5, the
-`defaultTimeLocale` function was moved from System.Locale (in the
-old-locale library) into Data.Time.
-
-We support both behaviors because time 1.4 is a boot library for GHC
-7.8. time 1.5 is a boot library for GHC 7.10.
-
-When support is dropped for GHC 7.8, this module may be obsoleted.
--}
-
-#if MIN_VERSION_time(1,5,0)
-module Text.Pandoc.Compat.Time (
- module Data.Time
-)
-where
-import Data.Time
-
-#else
-module Text.Pandoc.Compat.Time (
- module Data.Time,
- defaultTimeLocale
-)
-where
-import Data.Time
-import System.Locale ( defaultTimeLocale )
-
-#endif
diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hsb
deleted file mode 100644
index 8786647c5..000000000
--- a/src/Text/Pandoc/Data.hsb
+++ /dev/null
@@ -1,15 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
--- to be processed using hsb2hs
-module Text.Pandoc.Data (dataFiles) where
-import qualified Data.ByteString as B
-import System.FilePath (splitDirectories)
-import qualified System.FilePath.Posix as Posix
-
--- We ensure that the data files are stored using Posix
--- path separators (/), even on Windows.
-dataFiles :: [(FilePath, B.ByteString)]
-dataFiles = map (\(fp, contents) ->
- (Posix.joinPath (splitDirectories fp), contents)) dataFiles'
-
-dataFiles' :: [(FilePath, B.ByteString)]
-dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data"
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs
deleted file mode 100644
index c9f368abc..000000000
--- a/src/Text/Pandoc/Emoji.hs
+++ /dev/null
@@ -1,906 +0,0 @@
-{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Emoji
- Copyright : Copyright (C) 2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Emoji symbol lookup from canonical string identifier.
--}
-module Text.Pandoc.Emoji ( emojis ) where
-import qualified Data.Map as M
-
-emojis :: M.Map String String
-emojis = M.fromList
- [("+1","\128077")
- ,("-1","\128078")
- ,("100","\128175")
- ,("1234","\128290")
- ,("8ball","\127921")
- ,("a","\127344\65039")
- ,("ab","\127374")
- ,("abc","\128292")
- ,("abcd","\128289")
- ,("accept","\127569")
- ,("aerial_tramway","\128673")
- ,("airplane","\9992\65039")
- ,("alarm_clock","\9200")
- ,("alien","\128125")
- ,("ambulance","\128657")
- ,("anchor","\9875")
- ,("angel","\128124")
- ,("anger","\128162")
- ,("angry","\128544")
- ,("anguished","\128551")
- ,("ant","\128028")
- ,("apple","\127822")
- ,("aquarius","\9810")
- ,("aries","\9800")
- ,("arrow_backward","\9664\65039")
- ,("arrow_double_down","\9196")
- ,("arrow_double_up","\9195")
- ,("arrow_down","\11015\65039")
- ,("arrow_down_small","\128317")
- ,("arrow_forward","\9654\65039")
- ,("arrow_heading_down","\10549\65039")
- ,("arrow_heading_up","\10548\65039")
- ,("arrow_left","\11013\65039")
- ,("arrow_lower_left","\8601\65039")
- ,("arrow_lower_right","\8600\65039")
- ,("arrow_right","\10145\65039")
- ,("arrow_right_hook","\8618\65039")
- ,("arrow_up","\11014\65039")
- ,("arrow_up_down","\8597\65039")
- ,("arrow_up_small","\128316")
- ,("arrow_upper_left","\8598\65039")
- ,("arrow_upper_right","\8599\65039")
- ,("arrows_clockwise","\128259")
- ,("arrows_counterclockwise","\128260")
- ,("art","\127912")
- ,("articulated_lorry","\128667")
- ,("astonished","\128562")
- ,("athletic_shoe","\128095")
- ,("atm","\127975")
- ,("b","\127345\65039")
- ,("baby","\128118")
- ,("baby_bottle","\127868")
- ,("baby_chick","\128036")
- ,("baby_symbol","\128700")
- ,("back","\128281")
- ,("baggage_claim","\128708")
- ,("balloon","\127880")
- ,("ballot_box_with_check","\9745\65039")
- ,("bamboo","\127885")
- ,("banana","\127820")
- ,("bangbang","\8252\65039")
- ,("bank","\127974")
- ,("bar_chart","\128202")
- ,("barber","\128136")
- ,("baseball","\9918\65039")
- ,("basketball","\127936")
- ,("bath","\128704")
- ,("bathtub","\128705")
- ,("battery","\128267")
- ,("bear","\128059")
- ,("bee","\128029")
- ,("beer","\127866")
- ,("beers","\127867")
- ,("beetle","\128030")
- ,("beginner","\128304")
- ,("bell","\128276")
- ,("bento","\127857")
- ,("bicyclist","\128692")
- ,("bike","\128690")
- ,("bikini","\128089")
- ,("bird","\128038")
- ,("birthday","\127874")
- ,("black_circle","\9899")
- ,("black_joker","\127183")
- ,("black_large_square","\11035")
- ,("black_medium_small_square","\9726")
- ,("black_medium_square","\9724\65039")
- ,("black_nib","\10002\65039")
- ,("black_small_square","\9642\65039")
- ,("black_square_button","\128306")
- ,("blossom","\127804")
- ,("blowfish","\128033")
- ,("blue_book","\128216")
- ,("blue_car","\128665")
- ,("blue_heart","\128153")
- ,("blush","\128522")
- ,("boar","\128023")
- ,("boat","\9973")
- ,("bomb","\128163")
- ,("book","\128214")
- ,("bookmark","\128278")
- ,("bookmark_tabs","\128209")
- ,("books","\128218")
- ,("boom","\128165")
- ,("boot","\128098")
- ,("bouquet","\128144")
- ,("bow","\128583")
- ,("bowling","\127923")
- ,("boy","\128102")
- ,("bread","\127838")
- ,("bride_with_veil","\128112")
- ,("bridge_at_night","\127753")
- ,("briefcase","\128188")
- ,("broken_heart","\128148")
- ,("bug","\128027")
- ,("bulb","\128161")
- ,("bullettrain_front","\128645")
- ,("bullettrain_side","\128644")
- ,("bus","\128652")
- ,("busstop","\128655")
- ,("bust_in_silhouette","\128100")
- ,("busts_in_silhouette","\128101")
- ,("cactus","\127797")
- ,("cake","\127856")
- ,("calendar","\128198")
- ,("calling","\128242")
- ,("camel","\128043")
- ,("camera","\128247")
- ,("cancer","\9803")
- ,("candy","\127852")
- ,("capital_abcd","\128288")
- ,("capricorn","\9809")
- ,("car","\128663")
- ,("card_index","\128199")
- ,("carousel_horse","\127904")
- ,("cat","\128049")
- ,("cat2","\128008")
- ,("cd","\128191")
- ,("chart","\128185")
- ,("chart_with_downwards_trend","\128201")
- ,("chart_with_upwards_trend","\128200")
- ,("checkered_flag","\127937")
- ,("cherries","\127826")
- ,("cherry_blossom","\127800")
- ,("chestnut","\127792")
- ,("chicken","\128020")
- ,("children_crossing","\128696")
- ,("chocolate_bar","\127851")
- ,("christmas_tree","\127876")
- ,("church","\9962")
- ,("cinema","\127910")
- ,("circus_tent","\127914")
- ,("city_sunrise","\127751")
- ,("city_sunset","\127750")
- ,("cl","\127377")
- ,("clap","\128079")
- ,("clapper","\127916")
- ,("clipboard","\128203")
- ,("clock1","\128336")
- ,("clock10","\128345")
- ,("clock1030","\128357")
- ,("clock11","\128346")
- ,("clock1130","\128358")
- ,("clock12","\128347")
- ,("clock1230","\128359")
- ,("clock130","\128348")
- ,("clock2","\128337")
- ,("clock230","\128349")
- ,("clock3","\128338")
- ,("clock330","\128350")
- ,("clock4","\128339")
- ,("clock430","\128351")
- ,("clock5","\128340")
- ,("clock530","\128352")
- ,("clock6","\128341")
- ,("clock630","\128353")
- ,("clock7","\128342")
- ,("clock730","\128354")
- ,("clock8","\128343")
- ,("clock830","\128355")
- ,("clock9","\128344")
- ,("clock930","\128356")
- ,("closed_book","\128213")
- ,("closed_lock_with_key","\128272")
- ,("closed_umbrella","\127746")
- ,("cloud","\9729\65039")
- ,("clubs","\9827\65039")
- ,("cn","\127464\127475")
- ,("cocktail","\127864")
- ,("coffee","\9749")
- ,("cold_sweat","\128560")
- ,("collision","\128165")
- ,("computer","\128187")
- ,("confetti_ball","\127882")
- ,("confounded","\128534")
- ,("confused","\128533")
- ,("congratulations","\12951\65039")
- ,("construction","\128679")
- ,("construction_worker","\128119")
- ,("convenience_store","\127978")
- ,("cookie","\127850")
- ,("cool","\127378")
- ,("cop","\128110")
- ,("copyright","\169\65039")
- ,("corn","\127805")
- ,("couple","\128107")
- ,("couple_with_heart","\128145")
- ,("couplekiss","\128143")
- ,("cow","\128046")
- ,("cow2","\128004")
- ,("credit_card","\128179")
- ,("crescent_moon","\127769")
- ,("crocodile","\128010")
- ,("crossed_flags","\127884")
- ,("crown","\128081")
- ,("cry","\128546")
- ,("crying_cat_face","\128575")
- ,("crystal_ball","\128302")
- ,("cupid","\128152")
- ,("curly_loop","\10160")
- ,("currency_exchange","\128177")
- ,("curry","\127835")
- ,("custard","\127854")
- ,("customs","\128707")
- ,("cyclone","\127744")
- ,("dancer","\128131")
- ,("dancers","\128111")
- ,("dango","\127841")
- ,("dart","\127919")
- ,("dash","\128168")
- ,("date","\128197")
- ,("de","\127465\127466")
- ,("deciduous_tree","\127795")
- ,("department_store","\127980")
- ,("diamond_shape_with_a_dot_inside","\128160")
- ,("diamonds","\9830\65039")
- ,("disappointed","\128542")
- ,("disappointed_relieved","\128549")
- ,("dizzy","\128171")
- ,("dizzy_face","\128565")
- ,("do_not_litter","\128687")
- ,("dog","\128054")
- ,("dog2","\128021")
- ,("dollar","\128181")
- ,("dolls","\127886")
- ,("dolphin","\128044")
- ,("door","\128682")
- ,("doughnut","\127849")
- ,("dragon","\128009")
- ,("dragon_face","\128050")
- ,("dress","\128087")
- ,("dromedary_camel","\128042")
- ,("droplet","\128167")
- ,("dvd","\128192")
- ,("e-mail","\128231")
- ,("ear","\128066")
- ,("ear_of_rice","\127806")
- ,("earth_africa","\127757")
- ,("earth_americas","\127758")
- ,("earth_asia","\127759")
- ,("egg","\127859")
- ,("eggplant","\127814")
- ,("eight","8\65039\8419")
- ,("eight_pointed_black_star","\10036\65039")
- ,("eight_spoked_asterisk","\10035\65039")
- ,("electric_plug","\128268")
- ,("elephant","\128024")
- ,("email","\9993\65039")
- ,("end","\128282")
- ,("envelope","\9993\65039")
- ,("envelope_with_arrow","\128233")
- ,("es","\127466\127480")
- ,("euro","\128182")
- ,("european_castle","\127984")
- ,("european_post_office","\127972")
- ,("evergreen_tree","\127794")
- ,("exclamation","\10071")
- ,("expressionless","\128529")
- ,("eyeglasses","\128083")
- ,("eyes","\128064")
- ,("facepunch","\128074")
- ,("factory","\127981")
- ,("fallen_leaf","\127810")
- ,("family","\128106")
- ,("fast_forward","\9193")
- ,("fax","\128224")
- ,("fearful","\128552")
- ,("feet","\128062")
- ,("ferris_wheel","\127905")
- ,("file_folder","\128193")
- ,("fire","\128293")
- ,("fire_engine","\128658")
- ,("fireworks","\127878")
- ,("first_quarter_moon","\127763")
- ,("first_quarter_moon_with_face","\127771")
- ,("fish","\128031")
- ,("fish_cake","\127845")
- ,("fishing_pole_and_fish","\127907")
- ,("fist","\9994")
- ,("five","5\65039\8419")
- ,("flags","\127887")
- ,("flashlight","\128294")
- ,("flipper","\128044")
- ,("floppy_disk","\128190")
- ,("flower_playing_cards","\127924")
- ,("flushed","\128563")
- ,("foggy","\127745")
- ,("football","\127944")
- ,("footprints","\128099")
- ,("fork_and_knife","\127860")
- ,("fountain","\9970")
- ,("four","4\65039\8419")
- ,("four_leaf_clover","\127808")
- ,("fr","\127467\127479")
- ,("free","\127379")
- ,("fried_shrimp","\127844")
- ,("fries","\127839")
- ,("frog","\128056")
- ,("frowning","\128550")
- ,("fuelpump","\9981")
- ,("full_moon","\127765")
- ,("full_moon_with_face","\127773")
- ,("game_die","\127922")
- ,("gb","\127468\127463")
- ,("gem","\128142")
- ,("gemini","\9802")
- ,("ghost","\128123")
- ,("gift","\127873")
- ,("gift_heart","\128157")
- ,("girl","\128103")
- ,("globe_with_meridians","\127760")
- ,("goat","\128016")
- ,("golf","\9971")
- ,("grapes","\127815")
- ,("green_apple","\127823")
- ,("green_book","\128215")
- ,("green_heart","\128154")
- ,("grey_exclamation","\10069")
- ,("grey_question","\10068")
- ,("grimacing","\128556")
- ,("grin","\128513")
- ,("grinning","\128512")
- ,("guardsman","\128130")
- ,("guitar","\127928")
- ,("gun","\128299")
- ,("haircut","\128135")
- ,("hamburger","\127828")
- ,("hammer","\128296")
- ,("hamster","\128057")
- ,("hand","\9995")
- ,("handbag","\128092")
- ,("hankey","\128169")
- ,("hash","#\65039\8419")
- ,("hatched_chick","\128037")
- ,("hatching_chick","\128035")
- ,("headphones","\127911")
- ,("hear_no_evil","\128585")
- ,("heart","\10084\65039")
- ,("heart_decoration","\128159")
- ,("heart_eyes","\128525")
- ,("heart_eyes_cat","\128571")
- ,("heartbeat","\128147")
- ,("heartpulse","\128151")
- ,("hearts","\9829\65039")
- ,("heavy_check_mark","\10004\65039")
- ,("heavy_division_sign","\10135")
- ,("heavy_dollar_sign","\128178")
- ,("heavy_exclamation_mark","\10071")
- ,("heavy_minus_sign","\10134")
- ,("heavy_multiplication_x","\10006\65039")
- ,("heavy_plus_sign","\10133")
- ,("helicopter","\128641")
- ,("herb","\127807")
- ,("hibiscus","\127802")
- ,("high_brightness","\128262")
- ,("high_heel","\128096")
- ,("hocho","\128298")
- ,("honey_pot","\127855")
- ,("honeybee","\128029")
- ,("horse","\128052")
- ,("horse_racing","\127943")
- ,("hospital","\127973")
- ,("hotel","\127976")
- ,("hotsprings","\9832\65039")
- ,("hourglass","\8987")
- ,("hourglass_flowing_sand","\9203")
- ,("house","\127968")
- ,("house_with_garden","\127969")
- ,("hushed","\128559")
- ,("ice_cream","\127848")
- ,("icecream","\127846")
- ,("id","\127380")
- ,("ideograph_advantage","\127568")
- ,("imp","\128127")
- ,("inbox_tray","\128229")
- ,("incoming_envelope","\128232")
- ,("information_desk_person","\128129")
- ,("information_source","\8505\65039")
- ,("innocent","\128519")
- ,("interrobang","\8265\65039")
- ,("iphone","\128241")
- ,("it","\127470\127481")
- ,("izakaya_lantern","\127982")
- ,("jack_o_lantern","\127875")
- ,("japan","\128510")
- ,("japanese_castle","\127983")
- ,("japanese_goblin","\128122")
- ,("japanese_ogre","\128121")
- ,("jeans","\128086")
- ,("joy","\128514")
- ,("joy_cat","\128569")
- ,("jp","\127471\127477")
- ,("key","\128273")
- ,("keycap_ten","\128287")
- ,("kimono","\128088")
- ,("kiss","\128139")
- ,("kissing","\128535")
- ,("kissing_cat","\128573")
- ,("kissing_closed_eyes","\128538")
- ,("kissing_heart","\128536")
- ,("kissing_smiling_eyes","\128537")
- ,("knife","\128298")
- ,("koala","\128040")
- ,("koko","\127489")
- ,("kr","\127472\127479")
- ,("lantern","\127982")
- ,("large_blue_circle","\128309")
- ,("large_blue_diamond","\128311")
- ,("large_orange_diamond","\128310")
- ,("last_quarter_moon","\127767")
- ,("last_quarter_moon_with_face","\127772")
- ,("laughing","\128518")
- ,("leaves","\127811")
- ,("ledger","\128210")
- ,("left_luggage","\128709")
- ,("left_right_arrow","\8596\65039")
- ,("leftwards_arrow_with_hook","\8617\65039")
- ,("lemon","\127819")
- ,("leo","\9804")
- ,("leopard","\128006")
- ,("libra","\9806")
- ,("light_rail","\128648")
- ,("link","\128279")
- ,("lips","\128068")
- ,("lipstick","\128132")
- ,("lock","\128274")
- ,("lock_with_ink_pen","\128271")
- ,("lollipop","\127853")
- ,("loop","\10175")
- ,("loud_sound","\128266")
- ,("loudspeaker","\128226")
- ,("love_hotel","\127977")
- ,("love_letter","\128140")
- ,("low_brightness","\128261")
- ,("m","\9410\65039")
- ,("mag","\128269")
- ,("mag_right","\128270")
- ,("mahjong","\126980")
- ,("mailbox","\128235")
- ,("mailbox_closed","\128234")
- ,("mailbox_with_mail","\128236")
- ,("mailbox_with_no_mail","\128237")
- ,("man","\128104")
- ,("man_with_gua_pi_mao","\128114")
- ,("man_with_turban","\128115")
- ,("mans_shoe","\128094")
- ,("maple_leaf","\127809")
- ,("mask","\128567")
- ,("massage","\128134")
- ,("meat_on_bone","\127830")
- ,("mega","\128227")
- ,("melon","\127816")
- ,("memo","\128221")
- ,("mens","\128697")
- ,("metro","\128647")
- ,("microphone","\127908")
- ,("microscope","\128300")
- ,("milky_way","\127756")
- ,("minibus","\128656")
- ,("minidisc","\128189")
- ,("mobile_phone_off","\128244")
- ,("money_with_wings","\128184")
- ,("moneybag","\128176")
- ,("monkey","\128018")
- ,("monkey_face","\128053")
- ,("monorail","\128669")
- ,("moon","\127764")
- ,("mortar_board","\127891")
- ,("mount_fuji","\128507")
- ,("mountain_bicyclist","\128693")
- ,("mountain_cableway","\128672")
- ,("mountain_railway","\128670")
- ,("mouse","\128045")
- ,("mouse2","\128001")
- ,("movie_camera","\127909")
- ,("moyai","\128511")
- ,("muscle","\128170")
- ,("mushroom","\127812")
- ,("musical_keyboard","\127929")
- ,("musical_note","\127925")
- ,("musical_score","\127932")
- ,("mute","\128263")
- ,("nail_care","\128133")
- ,("name_badge","\128219")
- ,("necktie","\128084")
- ,("negative_squared_cross_mark","\10062")
- ,("neutral_face","\128528")
- ,("new","\127381")
- ,("new_moon","\127761")
- ,("new_moon_with_face","\127770")
- ,("newspaper","\128240")
- ,("ng","\127382")
- ,("night_with_stars","\127747")
- ,("nine","9\65039\8419")
- ,("no_bell","\128277")
- ,("no_bicycles","\128691")
- ,("no_entry","\9940")
- ,("no_entry_sign","\128683")
- ,("no_good","\128581")
- ,("no_mobile_phones","\128245")
- ,("no_mouth","\128566")
- ,("no_pedestrians","\128695")
- ,("no_smoking","\128685")
- ,("non-potable_water","\128689")
- ,("nose","\128067")
- ,("notebook","\128211")
- ,("notebook_with_decorative_cover","\128212")
- ,("notes","\127926")
- ,("nut_and_bolt","\128297")
- ,("o","\11093")
- ,("o2","\127358\65039")
- ,("ocean","\127754")
- ,("octopus","\128025")
- ,("oden","\127842")
- ,("office","\127970")
- ,("ok","\127383")
- ,("ok_hand","\128076")
- ,("ok_woman","\128582")
- ,("older_man","\128116")
- ,("older_woman","\128117")
- ,("on","\128283")
- ,("oncoming_automobile","\128664")
- ,("oncoming_bus","\128653")
- ,("oncoming_police_car","\128660")
- ,("oncoming_taxi","\128662")
- ,("one","1\65039\8419")
- ,("open_book","\128214")
- ,("open_file_folder","\128194")
- ,("open_hands","\128080")
- ,("open_mouth","\128558")
- ,("ophiuchus","\9934")
- ,("orange_book","\128217")
- ,("outbox_tray","\128228")
- ,("ox","\128002")
- ,("package","\128230")
- ,("page_facing_up","\128196")
- ,("page_with_curl","\128195")
- ,("pager","\128223")
- ,("palm_tree","\127796")
- ,("panda_face","\128060")
- ,("paperclip","\128206")
- ,("parking","\127359\65039")
- ,("part_alternation_mark","\12349\65039")
- ,("partly_sunny","\9925")
- ,("passport_control","\128706")
- ,("paw_prints","\128062")
- ,("peach","\127825")
- ,("pear","\127824")
- ,("pencil","\128221")
- ,("pencil2","\9999\65039")
- ,("penguin","\128039")
- ,("pensive","\128532")
- ,("performing_arts","\127917")
- ,("persevere","\128547")
- ,("person_frowning","\128589")
- ,("person_with_blond_hair","\128113")
- ,("person_with_pouting_face","\128590")
- ,("phone","\9742\65039")
- ,("pig","\128055")
- ,("pig2","\128022")
- ,("pig_nose","\128061")
- ,("pill","\128138")
- ,("pineapple","\127821")
- ,("pisces","\9811")
- ,("pizza","\127829")
- ,("point_down","\128071")
- ,("point_left","\128072")
- ,("point_right","\128073")
- ,("point_up","\9757\65039")
- ,("point_up_2","\128070")
- ,("police_car","\128659")
- ,("poodle","\128041")
- ,("poop","\128169")
- ,("post_office","\127971")
- ,("postal_horn","\128239")
- ,("postbox","\128238")
- ,("potable_water","\128688")
- ,("pouch","\128093")
- ,("poultry_leg","\127831")
- ,("pound","\128183")
- ,("pouting_cat","\128574")
- ,("pray","\128591")
- ,("princess","\128120")
- ,("punch","\128074")
- ,("purple_heart","\128156")
- ,("purse","\128091")
- ,("pushpin","\128204")
- ,("put_litter_in_its_place","\128686")
- ,("question","\10067")
- ,("rabbit","\128048")
- ,("rabbit2","\128007")
- ,("racehorse","\128014")
- ,("radio","\128251")
- ,("radio_button","\128280")
- ,("rage","\128545")
- ,("railway_car","\128643")
- ,("rainbow","\127752")
- ,("raised_hand","\9995")
- ,("raised_hands","\128588")
- ,("raising_hand","\128587")
- ,("ram","\128015")
- ,("ramen","\127836")
- ,("rat","\128000")
- ,("recycle","\9851\65039")
- ,("red_car","\128663")
- ,("red_circle","\128308")
- ,("registered","\174\65039")
- ,("relaxed","\9786\65039")
- ,("relieved","\128524")
- ,("repeat","\128257")
- ,("repeat_one","\128258")
- ,("restroom","\128699")
- ,("revolving_hearts","\128158")
- ,("rewind","\9194")
- ,("ribbon","\127872")
- ,("rice","\127834")
- ,("rice_ball","\127833")
- ,("rice_cracker","\127832")
- ,("rice_scene","\127889")
- ,("ring","\128141")
- ,("rocket","\128640")
- ,("roller_coaster","\127906")
- ,("rooster","\128019")
- ,("rose","\127801")
- ,("rotating_light","\128680")
- ,("round_pushpin","\128205")
- ,("rowboat","\128675")
- ,("ru","\127479\127482")
- ,("rugby_football","\127945")
- ,("runner","\127939")
- ,("running","\127939")
- ,("running_shirt_with_sash","\127933")
- ,("sa","\127490\65039")
- ,("sagittarius","\9808")
- ,("sailboat","\9973")
- ,("sake","\127862")
- ,("sandal","\128097")
- ,("santa","\127877")
- ,("satellite","\128225")
- ,("satisfied","\128518")
- ,("saxophone","\127927")
- ,("school","\127979")
- ,("school_satchel","\127890")
- ,("scissors","\9986\65039")
- ,("scorpius","\9807")
- ,("scream","\128561")
- ,("scream_cat","\128576")
- ,("scroll","\128220")
- ,("seat","\128186")
- ,("secret","\12953\65039")
- ,("see_no_evil","\128584")
- ,("seedling","\127793")
- ,("seven","7\65039\8419")
- ,("shaved_ice","\127847")
- ,("sheep","\128017")
- ,("shell","\128026")
- ,("ship","\128674")
- ,("shirt","\128085")
- ,("shit","\128169")
- ,("shoe","\128094")
- ,("shower","\128703")
- ,("signal_strength","\128246")
- ,("six","6\65039\8419")
- ,("six_pointed_star","\128303")
- ,("ski","\127935")
- ,("skull","\128128")
- ,("sleeping","\128564")
- ,("sleepy","\128554")
- ,("slot_machine","\127920")
- ,("small_blue_diamond","\128313")
- ,("small_orange_diamond","\128312")
- ,("small_red_triangle","\128314")
- ,("small_red_triangle_down","\128315")
- ,("smile","\128516")
- ,("smile_cat","\128568")
- ,("smiley","\128515")
- ,("smiley_cat","\128570")
- ,("smiling_imp","\128520")
- ,("smirk","\128527")
- ,("smirk_cat","\128572")
- ,("smoking","\128684")
- ,("snail","\128012")
- ,("snake","\128013")
- ,("snowboarder","\127938")
- ,("snowflake","\10052\65039")
- ,("snowman","\9924")
- ,("sob","\128557")
- ,("soccer","\9917")
- ,("soon","\128284")
- ,("sos","\127384")
- ,("sound","\128265")
- ,("space_invader","\128126")
- ,("spades","\9824\65039")
- ,("spaghetti","\127837")
- ,("sparkle","\10055\65039")
- ,("sparkler","\127879")
- ,("sparkles","\10024")
- ,("sparkling_heart","\128150")
- ,("speak_no_evil","\128586")
- ,("speaker","\128264")
- ,("speech_balloon","\128172")
- ,("speedboat","\128676")
- ,("star","\11088")
- ,("star2","\127775")
- ,("stars","\127776")
- ,("station","\128649")
- ,("statue_of_liberty","\128509")
- ,("steam_locomotive","\128642")
- ,("stew","\127858")
- ,("straight_ruler","\128207")
- ,("strawberry","\127827")
- ,("stuck_out_tongue","\128539")
- ,("stuck_out_tongue_closed_eyes","\128541")
- ,("stuck_out_tongue_winking_eye","\128540")
- ,("sun_with_face","\127774")
- ,("sunflower","\127803")
- ,("sunglasses","\128526")
- ,("sunny","\9728\65039")
- ,("sunrise","\127749")
- ,("sunrise_over_mountains","\127748")
- ,("surfer","\127940")
- ,("sushi","\127843")
- ,("suspension_railway","\128671")
- ,("sweat","\128531")
- ,("sweat_drops","\128166")
- ,("sweat_smile","\128517")
- ,("sweet_potato","\127840")
- ,("swimmer","\127946")
- ,("symbols","\128291")
- ,("syringe","\128137")
- ,("tada","\127881")
- ,("tanabata_tree","\127883")
- ,("tangerine","\127818")
- ,("taurus","\9801")
- ,("taxi","\128661")
- ,("tea","\127861")
- ,("telephone","\9742\65039")
- ,("telephone_receiver","\128222")
- ,("telescope","\128301")
- ,("tennis","\127934")
- ,("tent","\9978")
- ,("thought_balloon","\128173")
- ,("three","3\65039\8419")
- ,("thumbsdown","\128078")
- ,("thumbsup","\128077")
- ,("ticket","\127915")
- ,("tiger","\128047")
- ,("tiger2","\128005")
- ,("tired_face","\128555")
- ,("tm","\8482\65039")
- ,("toilet","\128701")
- ,("tokyo_tower","\128508")
- ,("tomato","\127813")
- ,("tongue","\128069")
- ,("top","\128285")
- ,("tophat","\127913")
- ,("tractor","\128668")
- ,("traffic_light","\128677")
- ,("train","\128651")
- ,("train2","\128646")
- ,("tram","\128650")
- ,("triangular_flag_on_post","\128681")
- ,("triangular_ruler","\128208")
- ,("trident","\128305")
- ,("triumph","\128548")
- ,("trolleybus","\128654")
- ,("trophy","\127942")
- ,("tropical_drink","\127865")
- ,("tropical_fish","\128032")
- ,("truck","\128666")
- ,("trumpet","\127930")
- ,("tshirt","\128085")
- ,("tulip","\127799")
- ,("turtle","\128034")
- ,("tv","\128250")
- ,("twisted_rightwards_arrows","\128256")
- ,("two","2\65039\8419")
- ,("two_hearts","\128149")
- ,("two_men_holding_hands","\128108")
- ,("two_women_holding_hands","\128109")
- ,("u5272","\127545")
- ,("u5408","\127540")
- ,("u55b6","\127546")
- ,("u6307","\127535")
- ,("u6708","\127543\65039")
- ,("u6709","\127542")
- ,("u6e80","\127541")
- ,("u7121","\127514")
- ,("u7533","\127544")
- ,("u7981","\127538")
- ,("u7a7a","\127539")
- ,("uk","\127468\127463")
- ,("umbrella","\9748")
- ,("unamused","\128530")
- ,("underage","\128286")
- ,("unlock","\128275")
- ,("up","\127385")
- ,("us","\127482\127480")
- ,("v","\9996\65039")
- ,("vertical_traffic_light","\128678")
- ,("vhs","\128252")
- ,("vibration_mode","\128243")
- ,("video_camera","\128249")
- ,("video_game","\127918")
- ,("violin","\127931")
- ,("virgo","\9805")
- ,("volcano","\127755")
- ,("vs","\127386")
- ,("walking","\128694")
- ,("waning_crescent_moon","\127768")
- ,("waning_gibbous_moon","\127766")
- ,("warning","\9888\65039")
- ,("watch","\8986")
- ,("water_buffalo","\128003")
- ,("watermelon","\127817")
- ,("wave","\128075")
- ,("wavy_dash","\12336\65039")
- ,("waxing_crescent_moon","\127762")
- ,("waxing_gibbous_moon","\127764")
- ,("wc","\128702")
- ,("weary","\128553")
- ,("wedding","\128146")
- ,("whale","\128051")
- ,("whale2","\128011")
- ,("wheelchair","\9855")
- ,("white_check_mark","\9989")
- ,("white_circle","\9898")
- ,("white_flower","\128174")
- ,("white_large_square","\11036")
- ,("white_medium_small_square","\9725")
- ,("white_medium_square","\9723\65039")
- ,("white_small_square","\9643\65039")
- ,("white_square_button","\128307")
- ,("wind_chime","\127888")
- ,("wine_glass","\127863")
- ,("wink","\128521")
- ,("wolf","\128058")
- ,("woman","\128105")
- ,("womans_clothes","\128090")
- ,("womans_hat","\128082")
- ,("womens","\128698")
- ,("worried","\128543")
- ,("wrench","\128295")
- ,("x","\10060")
- ,("yellow_heart","\128155")
- ,("yen","\128180")
- ,("yum","\128523")
- ,("zap","\9889")
- ,("zero","0\65039\8419")
- ,("zzz","\128164")
- ]
-
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
deleted file mode 100644
index 65f912c88..000000000
--- a/src/Text/Pandoc/Error.hs
+++ /dev/null
@@ -1,76 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-{- |
- Module : Text.Pandoc.Error
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-This module provides a standard way to deal with possible errors encounted
-during parsing.
-
--}
-module Text.Pandoc.Error (
- PandocError(..),
- handleError) where
-
-import Text.Parsec.Error
-import Text.Parsec.Pos hiding (Line)
-import Data.Generics (Typeable)
-import GHC.Generics (Generic)
-import Control.Exception (Exception)
-import Text.Pandoc.Shared (err)
-
-type Input = String
-
-data PandocError = PandocFileReadError FilePath
- | PandocShouldNeverHappenError String
- | PandocSomeError String
- | PandocParseError String
- | PandocParsecError Input ParseError
- | PandocMakePDFError String
- deriving (Show, Typeable, Generic)
-
-instance Exception PandocError
-
--- | Handle PandocError by exiting with an error message.
-handleError :: Either PandocError a -> IO a
-handleError (Right r) = return r
-handleError (Left e) =
- case e of
- PandocFileReadError fp -> err 61 $ "problem reading " ++ fp
- PandocShouldNeverHappenError s -> err 62 s
- PandocSomeError s -> err 63 s
- PandocParseError s -> err 64 s
- PandocParsecError input err' ->
- let errPos = errorPos err'
- errLine = sourceLine errPos
- errColumn = sourceColumn errPos
- ls = lines input ++ [""]
- errorInFile = if length ls > errLine - 1
- then concat ["\n", (ls !! (errLine - 1))
- ,"\n", replicate (errColumn - 1) ' '
- ,"^"]
- else ""
- in err 65 $ "\nError at " ++ show err' ++ errorInFile
- PandocMakePDFError s -> err 65 s
-
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
deleted file mode 100644
index d5e59e8e1..000000000
--- a/src/Text/Pandoc/Extensions.hs
+++ /dev/null
@@ -1,267 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Extensions
- Copyright : Copyright (C) 2012-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Data structures and functions for representing markup extensions.
--}
-module Text.Pandoc.Extensions ( Extension(..)
- , Extensions
- , emptyExtensions
- , extensionsFromList
- , extensionEnabled
- , enableExtension
- , disableExtension
- , pandocExtensions
- , plainExtensions
- , strictExtensions
- , phpMarkdownExtraExtensions
- , githubMarkdownExtensions
- , multimarkdownExtensions )
-where
-import Data.Bits (testBit, setBit, clearBit)
-import Data.Data (Data)
-import Data.Typeable (Typeable)
-import GHC.Generics (Generic)
-
-newtype Extensions = Extensions Integer
- deriving (Show, Read, Eq, Ord, Data, Typeable, Generic)
-
-extensionsFromList :: [Extension] -> Extensions
-extensionsFromList = foldr enableExtension emptyExtensions
-
-emptyExtensions :: Extensions
-emptyExtensions = Extensions 0
-
-extensionEnabled :: Extension -> Extensions -> Bool
-extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)
-
-enableExtension :: Extension -> Extensions -> Extensions
-enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))
-
-disableExtension :: Extension -> Extensions -> Extensions
-disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))
-
--- | Individually selectable syntax extensions.
-data Extension =
- Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
- | Ext_inline_notes -- ^ Pandoc-style inline notes
- | Ext_pandoc_title_block -- ^ Pandoc title block
- | Ext_yaml_metadata_block -- ^ YAML metadata block
- | Ext_mmd_title_block -- ^ Multimarkdown metadata block
- | Ext_table_captions -- ^ Pandoc-style table captions
- | Ext_implicit_figures -- ^ A paragraph with just an image is a figure
- | Ext_simple_tables -- ^ Pandoc-style simple tables
- | Ext_multiline_tables -- ^ Pandoc-style multiline tables
- | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
- | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
- | Ext_citations -- ^ Pandoc/citeproc citations
- | Ext_raw_tex -- ^ Allow raw TeX (other than math)
- | Ext_raw_html -- ^ Allow raw HTML
- | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
- | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
- | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
- | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
- | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
- | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
- | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
- | Ext_inline_code_attributes -- ^ Allow attributes on inline code
- | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
- | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
- | Ext_native_spans -- ^ Use Span inlines for contents of <span>
- | Ext_bracketed_spans -- ^ Bracketed spans with attributes
- | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
- -- iff container has attribute 'markdown'
- | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
- | Ext_link_attributes -- ^ link and image attributes
- | Ext_mmd_link_attributes -- ^ MMD style reference link attributes
- | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
- | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
- | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
- | Ext_startnum -- ^ Make start number of ordered list significant
- | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
- | Ext_compact_definition_lists -- ^ Definition lists without
- -- space between items, and disallow laziness
- | Ext_example_lists -- ^ Markdown-style numbered examples
- | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
- | Ext_angle_brackets_escapable -- ^ Make < and > escapable
- | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
- | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
- | Ext_blank_before_header -- ^ Require blank line before a header
- | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
- | Ext_superscript -- ^ Superscript using ^this^ syntax
- | Ext_subscript -- ^ Subscript using ~this~ syntax
- | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
- | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
- | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
- -- East Asian wide characters
- | Ext_literate_haskell -- ^ Enable literate Haskell conventions
- | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
- | Ext_emoji -- ^ Support emoji like :smile:
- | Ext_auto_identifiers -- ^ Automatic identifiers for headers
- | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
- | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
- | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
- | Ext_implicit_header_references -- ^ Implicit reference links for headers
- | Ext_line_blocks -- ^ RST style line blocks
- | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
- | Ext_shortcut_reference_links -- ^ Shortcut reference links
- | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
- | Ext_old_dashes -- ^ -- = em, - before number = en
- deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
-
-pandocExtensions :: Extensions
-pandocExtensions = extensionsFromList
- [ Ext_footnotes
- , Ext_inline_notes
- , Ext_pandoc_title_block
- , Ext_yaml_metadata_block
- , Ext_table_captions
- , Ext_implicit_figures
- , Ext_simple_tables
- , Ext_multiline_tables
- , Ext_grid_tables
- , Ext_pipe_tables
- , Ext_citations
- , Ext_raw_tex
- , Ext_raw_html
- , Ext_tex_math_dollars
- , Ext_latex_macros
- , Ext_fenced_code_blocks
- , Ext_fenced_code_attributes
- , Ext_backtick_code_blocks
- , Ext_inline_code_attributes
- , Ext_markdown_in_html_blocks
- , Ext_native_divs
- , Ext_native_spans
- , Ext_bracketed_spans
- , Ext_escaped_line_breaks
- , Ext_fancy_lists
- , Ext_startnum
- , Ext_definition_lists
- , Ext_example_lists
- , Ext_all_symbols_escapable
- , Ext_intraword_underscores
- , Ext_blank_before_blockquote
- , Ext_blank_before_header
- , Ext_strikeout
- , Ext_superscript
- , Ext_subscript
- , Ext_auto_identifiers
- , Ext_header_attributes
- , Ext_link_attributes
- , Ext_implicit_header_references
- , Ext_line_blocks
- , Ext_shortcut_reference_links
- , Ext_smart
- ]
-
-plainExtensions :: Extensions
-plainExtensions = extensionsFromList
- [ Ext_table_captions
- , Ext_implicit_figures
- , Ext_simple_tables
- , Ext_multiline_tables
- , Ext_grid_tables
- , Ext_latex_macros
- , Ext_fancy_lists
- , Ext_startnum
- , Ext_definition_lists
- , Ext_example_lists
- , Ext_intraword_underscores
- , Ext_blank_before_blockquote
- , Ext_blank_before_header
- , Ext_strikeout
- ]
-
-phpMarkdownExtraExtensions :: Extensions
-phpMarkdownExtraExtensions = extensionsFromList
- [ Ext_footnotes
- , Ext_pipe_tables
- , Ext_raw_html
- , Ext_markdown_attribute
- , Ext_fenced_code_blocks
- , Ext_definition_lists
- , Ext_intraword_underscores
- , Ext_header_attributes
- , Ext_link_attributes
- , Ext_abbreviations
- , Ext_shortcut_reference_links
- ]
-
-githubMarkdownExtensions :: Extensions
-githubMarkdownExtensions = extensionsFromList
- [ Ext_angle_brackets_escapable
- , Ext_pipe_tables
- , Ext_raw_html
- , Ext_fenced_code_blocks
- , Ext_auto_identifiers
- , Ext_ascii_identifiers
- , Ext_backtick_code_blocks
- , Ext_autolink_bare_uris
- , Ext_intraword_underscores
- , Ext_strikeout
- , Ext_hard_line_breaks
- , Ext_emoji
- , Ext_lists_without_preceding_blankline
- , Ext_shortcut_reference_links
- ]
-
-multimarkdownExtensions :: Extensions
-multimarkdownExtensions = extensionsFromList
- [ Ext_pipe_tables
- , Ext_raw_html
- , Ext_markdown_attribute
- , Ext_mmd_link_attributes
- -- , Ext_raw_tex
- -- Note: MMD's raw TeX syntax requires raw TeX to be
- -- enclosed in HTML comment
- , Ext_tex_math_double_backslash
- , Ext_intraword_underscores
- , Ext_mmd_title_block
- , Ext_footnotes
- , Ext_definition_lists
- , Ext_all_symbols_escapable
- , Ext_implicit_header_references
- , Ext_auto_identifiers
- , Ext_mmd_header_identifiers
- , Ext_implicit_figures
- -- Note: MMD's syntax for superscripts and subscripts
- -- is a bit more permissive than pandoc's, allowing
- -- e^2 and a~1 instead of e^2^ and a~1~, so even with
- -- these options we don't have full support for MMD
- -- superscripts and subscripts, but there's no reason
- -- not to include these:
- , Ext_superscript
- , Ext_subscript
- ]
-
-strictExtensions :: Extensions
-strictExtensions = extensionsFromList
- [ Ext_raw_html
- , Ext_shortcut_reference_links
- ]
-
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
deleted file mode 100644
index df060915c..000000000
--- a/src/Text/Pandoc/Highlighting.hs
+++ /dev/null
@@ -1,223 +0,0 @@
-{-
-Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Highlighting
- Copyright : Copyright (C) 2008-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Exports functions for syntax highlighting.
--}
-
-module Text.Pandoc.Highlighting ( highlightingStyles
- , languages
- , languagesByExtension
- , highlight
- , formatLaTeXInline
- , formatLaTeXBlock
- , styleToLaTeX
- , formatHtmlInline
- , formatHtmlBlock
- , styleToCss
- , pygments
- , espresso
- , zenburn
- , tango
- , kate
- , monochrome
- , haddock
- , Style
- , fromListingsLanguage
- , toListingsLanguage
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared (safeRead)
-import Skylighting
-import Data.Maybe (fromMaybe)
-import Data.Char (toLower)
-import qualified Data.Map as M
-import Control.Monad
-import qualified Data.Text as T
-
-highlightingStyles :: [(String, Style)]
-highlightingStyles =
- [("pygments", pygments),
- ("tango", tango),
- ("espresso", espresso),
- ("zenburn", zenburn),
- ("kate", kate),
- ("monochrome", monochrome),
- ("breezedark", breezeDark),
- ("haddock", haddock)]
-
-languages :: [String]
-languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap]
-
-languagesByExtension :: String -> [String]
-languagesByExtension ext =
- [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext]
-
-highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
- -> Attr -- ^ Attributes of the CodeBlock
- -> String -- ^ Raw contents of the CodeBlock
- -> Maybe a -- ^ Maybe the formatted result
-highlight formatter (_, classes, keyvals) rawCode =
- let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals))
- fmtOpts = defaultFormatOpts{
- startNumber = firstNum,
- numberLines = any (`elem`
- ["number","numberLines", "number-lines"]) classes }
- tokenizeOpts = TokenizerConfig{ syntaxMap = defaultSyntaxMap
- , traceOutput = False }
- classes' = map T.pack classes
- rawCode' = T.pack rawCode
- in case msum (map (\l -> lookupSyntax l defaultSyntaxMap) classes') of
- Nothing
- | numberLines fmtOpts -> Just
- $ formatter fmtOpts{ codeClasses = [],
- containerClasses = classes' }
- $ map (\ln -> [(NormalTok, ln)]) $ T.lines rawCode'
- | otherwise -> Nothing
- Just syntax ->
- case tokenize tokenizeOpts syntax rawCode' of
- Right slines -> Just $
- formatter fmtOpts{ codeClasses =
- [T.toLower (sShortname syntax)],
- containerClasses = classes' } slines
- Left _ -> Nothing
-
--- Functions for correlating latex listings package's language names
--- with skylighting language names:
-
-langToListingsMap :: M.Map String String
-langToListingsMap = M.fromList langsList
-
-listingsToLangMap :: M.Map String String
-listingsToLangMap = M.fromList $ map switch langsList
- where switch (a,b) = (b,a)
-
-langsList :: [(String, String)]
-langsList =
- [("abap","ABAP"),
- ("acm","ACM"),
- ("acmscript","ACMscript"),
- ("acsl","ACSL"),
- ("ada","Ada"),
- ("algol","Algol"),
- ("ant","Ant"),
- ("assembler","Assembler"),
- ("gnuassembler","Assembler"),
- ("awk","Awk"),
- ("bash","bash"),
- ("monobasic","Basic"),
- ("purebasic","Basic"),
- ("c","C"),
- ("cpp","C++"),
- ("c++","C++"),
- ("ocaml","Caml"),
- ("cil","CIL"),
- ("clean","Clean"),
- ("cobol","Cobol"),
- ("comal80","Comal80"),
- ("command.com","command.com"),
- ("comsol","Comsol"),
- ("csh","csh"),
- ("delphi","Delphi"),
- ("elan","Elan"),
- ("erlang","erlang"),
- ("euphoria","Euphoria"),
- ("fortran","Fortran"),
- ("gap","GAP"),
- ("gcl","GCL"),
- ("gnuplot","Gnuplot"),
- ("hansl","hansl"),
- ("haskell","Haskell"),
- ("html","HTML"),
- ("idl","IDL"),
- ("inform","inform"),
- ("java","Java"),
- ("jvmis","JVMIS"),
- ("ksh","ksh"),
- ("lingo","Lingo"),
- ("lisp","Lisp"),
- ("commonlisp","Lisp"),
- ("llvm","LLVM"),
- ("logo","Logo"),
- ("lua","Lua"),
- ("make","make"),
- ("makefile","make"),
- ("mathematica","Mathematica"),
- ("matlab","Matlab"),
- ("mercury","Mercury"),
- ("metapost","MetaPost"),
- ("miranda","Miranda"),
- ("mizar","Mizar"),
- ("ml","ML"),
- ("modula2","Modula-2"),
- ("mupad","MuPAD"),
- ("nastran","NASTRAN"),
- ("oberon2","Oberon-2"),
- ("ocl","OCL"),
- ("octave","Octave"),
- ("oz","Oz"),
- ("pascal","Pascal"),
- ("perl","Perl"),
- ("php","PHP"),
- ("pli","PL/I"),
- ("plasm","Plasm"),
- ("postscript","PostScript"),
- ("pov","POV"),
- ("prolog","Prolog"),
- ("promela","Promela"),
- ("pstricks","PSTricks"),
- ("python","Python"),
- ("r","R"),
- ("reduce","Reduce"),
- ("rexx","Rexx"),
- ("rsl","RSL"),
- ("ruby","Ruby"),
- ("s","S"),
- ("sas","SAS"),
- ("scala","Scala"),
- ("scilab","Scilab"),
- ("sh","sh"),
- ("shelxl","SHELXL"),
- ("simula","Simula"),
- ("sparql","SPARQL"),
- ("sql","SQL"),
- ("tcl","tcl"),
- ("tex","TeX"),
- ("latex","TeX"),
- ("vbscript","VBScript"),
- ("verilog","Verilog"),
- ("vhdl","VHDL"),
- ("vrml","VRML"),
- ("xml","XML"),
- ("xslt","XSLT")]
-
--- | Determine listings language name from skylighting language name.
-toListingsLanguage :: String -> Maybe String
-toListingsLanguage lang = M.lookup (map toLower lang) langToListingsMap
-
--- | Determine skylighting language name from listings language name.
-fromListingsLanguage :: String -> Maybe String
-fromListingsLanguage lang = M.lookup lang listingsToLangMap
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
deleted file mode 100644
index cc22c06ca..000000000
--- a/src/Text/Pandoc/ImageSize.hs
+++ /dev/null
@@ -1,547 +0,0 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
-{-# OPTIONS_GHC -fno-warn-type-defaults #-}
-{-
- Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc., 59
- Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
-Module : Text.Pandoc.ImageSize
-Copyright : Copyright (C) 2011-2016 John MacFarlane
-License : GNU GPL, version 2 or above
-
-Maintainer : John MacFarlane <jgm@berkeley.edu>
-Stability : alpha
-Portability : portable
-
-Functions for determining the size of a PNG, JPEG, or GIF image.
--}
-module Text.Pandoc.ImageSize ( ImageType(..)
- , imageType
- , imageSize
- , sizeInPixels
- , sizeInPoints
- , desiredSizeInPoints
- , Dimension(..)
- , Direction(..)
- , dimension
- , inInch
- , inPoints
- , numUnit
- , showInInch
- , showInPixel
- , showFl
- ) where
-import Data.ByteString (ByteString, unpack)
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy as BL
-import Data.Char (isDigit)
-import Control.Monad
-import Data.Bits
-import Data.Binary
-import Data.Binary.Get
-import Text.Pandoc.Shared (safeRead)
-import Data.Default (Default)
-import Numeric (showFFloat)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import qualified Data.Map as M
-import Control.Monad.Except
-import Data.Maybe (fromMaybe)
-
--- quick and dirty functions to get image sizes
--- algorithms borrowed from wwwis.pl
-
-data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show
-data Direction = Width | Height
-instance Show Direction where
- show Width = "width"
- show Height = "height"
-
-data Dimension = Pixel Integer
- | Centimeter Double
- | Inch Double
- | Percent Double
-instance Show Dimension where
- show (Pixel a) = show a ++ "px"
- show (Centimeter a) = showFl a ++ "cm"
- show (Inch a) = showFl a ++ "in"
- show (Percent a) = show a ++ "%"
-
-data ImageSize = ImageSize{
- pxX :: Integer
- , pxY :: Integer
- , dpiX :: Integer
- , dpiY :: Integer
- } deriving (Read, Show, Eq)
-instance Default ImageSize where
- def = ImageSize 300 200 72 72
-
-showFl :: (RealFloat a) => a -> String
-showFl a = showFFloat (Just 5) a ""
-
-imageType :: ByteString -> Maybe ImageType
-imageType img = case B.take 4 img of
- "\x89\x50\x4e\x47" -> return Png
- "\x47\x49\x46\x38" -> return Gif
- "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF
- "\xff\xd8\xff\xe1" -> return Jpeg -- Exif
- "%PDF" -> return Pdf
- "%!PS"
- | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
- -> return Eps
- _ -> mzero
-
-imageSize :: ByteString -> Either String ImageSize
-imageSize img =
- case imageType img of
- Just Png -> mbToEither "could not determine PNG size" $ pngSize img
- Just Gif -> mbToEither "could not determine GIF size" $ gifSize img
- Just Jpeg -> jpegSize img
- Just Eps -> mbToEither "could not determine EPS size" $ epsSize img
- Just Pdf -> Left "could not determine PDF size" -- TODO
- Nothing -> Left "could not determine image type"
- where mbToEither msg Nothing = Left msg
- mbToEither _ (Just x) = Right x
-
-defaultSize :: (Integer, Integer)
-defaultSize = (72, 72)
-
-sizeInPixels :: ImageSize -> (Integer, Integer)
-sizeInPixels s = (pxX s, pxY s)
-
--- | Calculate (height, width) in points using the image file's dpi metadata,
--- using 72 Points == 1 Inch.
-sizeInPoints :: ImageSize -> (Double, Double)
-sizeInPoints s = (pxXf * 72 / dpiXf, pxYf * 72 / dpiYf)
- where
- pxXf = fromIntegral $ pxX s
- pxYf = fromIntegral $ pxY s
- dpiXf = fromIntegral $ dpiX s
- dpiYf = fromIntegral $ dpiY s
-
--- | Calculate (height, width) in points, considering the desired dimensions in the
--- attribute, while falling back on the image file's dpi metadata if no dimensions
--- are specified in the attribute (or only dimensions in percentages).
-desiredSizeInPoints :: WriterOptions -> Attr -> ImageSize -> (Double, Double)
-desiredSizeInPoints opts attr s =
- case (getDim Width, getDim Height) of
- (Just w, Just h) -> (w, h)
- (Just w, Nothing) -> (w, w / ratio)
- (Nothing, Just h) -> (h * ratio, h)
- (Nothing, Nothing) -> sizeInPoints s
- where
- ratio = fromIntegral (pxX s) / fromIntegral (pxY s)
- getDim dir = case (dimension dir attr) of
- Just (Percent _) -> Nothing
- Just dim -> Just $ inPoints opts dim
- Nothing -> Nothing
-
-inPoints :: WriterOptions -> Dimension -> Double
-inPoints opts dim = 72 * inInch opts dim
-
-inInch :: WriterOptions -> Dimension -> Double
-inInch opts dim =
- case dim of
- (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts)
- (Centimeter a) -> a * 0.3937007874
- (Inch a) -> a
- (Percent _) -> 0
-
--- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000".
--- Note: Dimensions in percentages are converted to the empty string.
-showInInch :: WriterOptions -> Dimension -> String
-showInInch _ (Percent _) = ""
-showInInch opts dim = showFl $ inInch opts dim
-
--- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600".
--- Note: Dimensions in percentages are converted to the empty string.
-showInPixel :: WriterOptions -> Dimension -> String
-showInPixel opts dim =
- case dim of
- (Pixel a) -> show a
- (Centimeter a) -> show (floor $ dpi * a * 0.3937007874 :: Int)
- (Inch a) -> show (floor $ dpi * a :: Int)
- (Percent _) -> ""
- where
- dpi = fromIntegral $ writerDpi opts
-
--- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm")
-numUnit :: String -> Maybe (Double, String)
-numUnit s =
- let (nums, unit) = span (\c -> isDigit c || ('.'==c)) s
- in case safeRead nums of
- Just n -> Just (n, unit)
- Nothing -> Nothing
-
--- | Read a Dimension from an Attr attribute.
--- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc.
-dimension :: Direction -> Attr -> Maybe Dimension
-dimension dir (_, _, kvs) =
- case dir of
- Width -> extractDim "width"
- Height -> extractDim "height"
- where
- extractDim key =
- case lookup key kvs of
- Just str ->
- case numUnit str of
- Just (num, unit) -> toDim num unit
- Nothing -> Nothing
- Nothing -> Nothing
- toDim a "cm" = Just $ Centimeter a
- toDim a "mm" = Just $ Centimeter (a / 10)
- toDim a "in" = Just $ Inch a
- toDim a "inch" = Just $ Inch a
- toDim a "%" = Just $ Percent a
- toDim a "px" = Just $ Pixel (floor a::Integer)
- toDim a "" = Just $ Pixel (floor a::Integer)
- toDim _ _ = Nothing
-
-epsSize :: ByteString -> Maybe ImageSize
-epsSize img = do
- let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img
- let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls
- case ls' of
- [] -> mzero
- (x:_) -> case B.words x of
- (_:_:_:ux:uy:[]) -> do
- ux' <- safeRead $ B.unpack ux
- uy' <- safeRead $ B.unpack uy
- return ImageSize{
- pxX = ux'
- , pxY = uy'
- , dpiX = 72
- , dpiY = 72 }
- _ -> mzero
-
-pngSize :: ByteString -> Maybe ImageSize
-pngSize img = do
- let (h, rest) = B.splitAt 8 img
- guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
- h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
- let (i, rest') = B.splitAt 4 $ B.drop 4 rest
- guard $ i == "MHDR" || i == "IHDR"
- let (sizes, rest'') = B.splitAt 8 rest'
- (x,y) <- case map fromIntegral $ unpack $ sizes of
- ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
- ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
- (shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
- _ -> Nothing -- "PNG parse error"
- let (dpix, dpiy) = findpHYs rest''
- return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
-
-findpHYs :: ByteString -> (Integer, Integer)
-findpHYs x =
- if B.null x || "IDAT" `B.isPrefixOf` x
- then (72,72) -- default, no pHYs
- else if "pHYs" `B.isPrefixOf` x
- then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral
- $ unpack $ B.take 9 $ B.drop 4 x
- factor = if u == 1 -- dots per meter
- then \z -> z * 254 `div` 10000
- else const 72
- in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4,
- factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 )
- else findpHYs $ B.drop 1 x -- read another byte
-
-gifSize :: ByteString -> Maybe ImageSize
-gifSize img = do
- let (h, rest) = B.splitAt 6 img
- guard $ h == "GIF87a" || h == "GIF89a"
- case map fromIntegral $ unpack $ B.take 4 rest of
- [w2,w1,h2,h1] -> return ImageSize {
- pxX = shift w1 8 + w2,
- pxY = shift h1 8 + h2,
- dpiX = 72,
- dpiY = 72
- }
- _ -> Nothing -- "GIF parse error"
-
-jpegSize :: ByteString -> Either String ImageSize
-jpegSize img =
- let (hdr, rest) = B.splitAt 4 img
- in if B.length rest < 14
- then Left "unable to determine JPEG size"
- else case hdr of
- "\xff\xd8\xff\xe0" -> jfifSize rest
- "\xff\xd8\xff\xe1" -> exifSize rest
- _ -> Left "unable to determine JPEG size"
-
-jfifSize :: ByteString -> Either String ImageSize
-jfifSize rest =
- let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
- $ unpack $ B.take 5 $ B.drop 9 $ rest
- factor = case dpiDensity of
- 1 -> id
- 2 -> \x -> (x * 254 `div` 10)
- _ -> const 72
- dpix = factor (shift dpix1 8 + dpix2)
- dpiy = factor (shift dpiy1 8 + dpiy2)
- in case findJfifSize rest of
- Left msg -> Left msg
- Right (w,h) -> Right $ ImageSize { pxX = w
- , pxY = h
- , dpiX = dpix
- , dpiY = dpiy }
-
-findJfifSize :: ByteString -> Either String (Integer,Integer)
-findJfifSize bs =
- let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
- in case B.uncons bs' of
- Just (c,bs'') | c >= '\xc0' && c <= '\xc3' ->
- case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
- [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2)
- _ -> Left "JFIF parse error"
- Just (_,bs'') ->
- case map fromIntegral $ unpack $ B.take 2 bs'' of
- [c1,c2] ->
- let len = shift c1 8 + c2
- -- skip variables
- in findJfifSize $ B.drop len bs''
- _ -> Left "JFIF parse error"
- Nothing -> Left "Did not find JFIF length record"
-
-runGet' :: Get (Either String a) -> BL.ByteString -> Either String a
-runGet' p bl =
-#if MIN_VERSION_binary(0,7,0)
- case runGetOrFail p bl of
- Left (_,_,msg) -> Left msg
- Right (_,_,x) -> x
-#else
- runGet p bl
-#endif
-
-
-exifSize :: ByteString -> Either String ImageSize
-exifSize bs = runGet' header $ bl
- where bl = BL.fromChunks [bs]
- header = runExceptT $ exifHeader bl
--- NOTE: It would be nicer to do
--- runGet ((Just <$> exifHeader) <|> return Nothing)
--- which would prevent pandoc from raising an error when an exif header can't
--- be parsed. But we only get an Alternative instance for Get in binary 0.6,
--- and binary 0.5 ships with ghc 7.6.
-
-exifHeader :: BL.ByteString -> ExceptT String Get ImageSize
-exifHeader hdr = do
- _app1DataSize <- lift getWord16be
- exifHdr <- lift getWord32be
- unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
- zeros <- lift getWord16be
- unless (zeros == 0) $ throwError "Expected zeros after exif header"
- -- beginning of tiff header -- we read whole thing to use
- -- in getting data from offsets:
- let tiffHeader = BL.drop 8 hdr
- byteAlign <- lift getWord16be
- let bigEndian = byteAlign == 0x4d4d
- let (getWord16, getWord32, getWord64) =
- if bigEndian
- then (getWord16be, getWord32be, getWord64be)
- else (getWord16le, getWord32le, getWord64le)
- let getRational = do
- num <- getWord32
- den <- getWord32
- return $ fromIntegral num / fromIntegral den
- tagmark <- lift getWord16
- unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
- ifdOffset <- lift getWord32
- lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
- numentries <- lift getWord16
- let ifdEntry :: ExceptT String Get (TagType, DataFormat)
- ifdEntry = do
- tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
- <$> lift getWord16
- dataFormat <- lift getWord16
- numComponents <- lift getWord32
- (fmt, bytesPerComponent) <-
- case dataFormat of
- 1 -> return (UnsignedByte <$> getWord8, 1)
- 2 -> return (AsciiString <$>
- getLazyByteString
- (fromIntegral numComponents), 1)
- 3 -> return (UnsignedShort <$> getWord16, 2)
- 4 -> return (UnsignedLong <$> getWord32, 4)
- 5 -> return (UnsignedRational <$> getRational, 8)
- 6 -> return (SignedByte <$> getWord8, 1)
- 7 -> return (Undefined <$> getLazyByteString
- (fromIntegral numComponents), 1)
- 8 -> return (SignedShort <$> getWord16, 2)
- 9 -> return (SignedLong <$> getWord32, 4)
- 10 -> return (SignedRational <$> getRational, 8)
- 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4)
- 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8)
- _ -> throwError $ "Unknown data format " ++ show dataFormat
- let totalBytes = fromIntegral $ numComponents * bytesPerComponent
- payload <- if totalBytes <= 4 -- data is right here
- then lift $ fmt <* skip (4 - totalBytes)
- else do -- get data from offset
- offs <- lift getWord32
- let bytesAtOffset =
- BL.take (fromIntegral totalBytes)
- $ BL.drop (fromIntegral offs) tiffHeader
- case runGet' (Right <$> fmt) bytesAtOffset of
- Left msg -> throwError msg
- Right x -> return x
- return (tag, payload)
- entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
- subentries <- case lookup ExifOffset entries of
- Just (UnsignedLong offset') -> do
- pos <- lift bytesRead
- lift $ skip (fromIntegral offset' - (fromIntegral pos - 8))
- numsubentries <- lift getWord16
- sequence $
- replicate (fromIntegral numsubentries) ifdEntry
- _ -> return []
- let allentries = entries ++ subentries
- (wdth, hght) <- case (lookup ExifImageWidth allentries,
- lookup ExifImageHeight allentries) of
- (Just (UnsignedLong w), Just (UnsignedLong h)) ->
- return (fromIntegral w, fromIntegral h)
- _ -> return defaultSize
- -- we return a default width and height when
- -- the exif header doesn't contain these
- let resfactor = case lookup ResolutionUnit allentries of
- Just (UnsignedShort 1) -> (100 / 254)
- _ -> 1
- let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
- $ lookup XResolution allentries
- let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
- $ lookup YResolution allentries
- return $ ImageSize{
- pxX = wdth
- , pxY = hght
- , dpiX = xres
- , dpiY = yres }
-
-data DataFormat = UnsignedByte Word8
- | AsciiString BL.ByteString
- | UnsignedShort Word16
- | UnsignedLong Word32
- | UnsignedRational Rational
- | SignedByte Word8
- | Undefined BL.ByteString
- | SignedShort Word16
- | SignedLong Word32
- | SignedRational Rational
- | SingleFloat Word32
- | DoubleFloat Word64
- deriving (Show)
-
-data TagType = ImageDescription
- | Make
- | Model
- | Orientation
- | XResolution
- | YResolution
- | ResolutionUnit
- | Software
- | DateTime
- | WhitePoint
- | PrimaryChromaticities
- | YCbCrCoefficients
- | YCbCrPositioning
- | ReferenceBlackWhite
- | Copyright
- | ExifOffset
- | ExposureTime
- | FNumber
- | ExposureProgram
- | ISOSpeedRatings
- | ExifVersion
- | DateTimeOriginal
- | DateTimeDigitized
- | ComponentConfiguration
- | CompressedBitsPerPixel
- | ShutterSpeedValue
- | ApertureValue
- | BrightnessValue
- | ExposureBiasValue
- | MaxApertureValue
- | SubjectDistance
- | MeteringMode
- | LightSource
- | Flash
- | FocalLength
- | MakerNote
- | UserComment
- | FlashPixVersion
- | ColorSpace
- | ExifImageWidth
- | ExifImageHeight
- | RelatedSoundFile
- | ExifInteroperabilityOffset
- | FocalPlaneXResolution
- | FocalPlaneYResolution
- | FocalPlaneResolutionUnit
- | SensingMethod
- | FileSource
- | SceneType
- | UnknownTagType
- deriving (Show, Eq, Ord)
-
-tagTypeTable :: M.Map Word16 TagType
-tagTypeTable = M.fromList
- [ (0x010e, ImageDescription)
- , (0x010f, Make)
- , (0x0110, Model)
- , (0x0112, Orientation)
- , (0x011a, XResolution)
- , (0x011b, YResolution)
- , (0x0128, ResolutionUnit)
- , (0x0131, Software)
- , (0x0132, DateTime)
- , (0x013e, WhitePoint)
- , (0x013f, PrimaryChromaticities)
- , (0x0211, YCbCrCoefficients)
- , (0x0213, YCbCrPositioning)
- , (0x0214, ReferenceBlackWhite)
- , (0x8298, Copyright)
- , (0x8769, ExifOffset)
- , (0x829a, ExposureTime)
- , (0x829d, FNumber)
- , (0x8822, ExposureProgram)
- , (0x8827, ISOSpeedRatings)
- , (0x9000, ExifVersion)
- , (0x9003, DateTimeOriginal)
- , (0x9004, DateTimeDigitized)
- , (0x9101, ComponentConfiguration)
- , (0x9102, CompressedBitsPerPixel)
- , (0x9201, ShutterSpeedValue)
- , (0x9202, ApertureValue)
- , (0x9203, BrightnessValue)
- , (0x9204, ExposureBiasValue)
- , (0x9205, MaxApertureValue)
- , (0x9206, SubjectDistance)
- , (0x9207, MeteringMode)
- , (0x9208, LightSource)
- , (0x9209, Flash)
- , (0x920a, FocalLength)
- , (0x927c, MakerNote)
- , (0x9286, UserComment)
- , (0xa000, FlashPixVersion)
- , (0xa001, ColorSpace)
- , (0xa002, ExifImageWidth)
- , (0xa003, ExifImageHeight)
- , (0xa004, RelatedSoundFile)
- , (0xa005, ExifInteroperabilityOffset)
- , (0xa20e, FocalPlaneXResolution)
- , (0xa20f, FocalPlaneYResolution)
- , (0xa210, FocalPlaneResolutionUnit)
- , (0xa217, SensingMethod)
- , (0xa300, FileSource)
- , (0xa301, SceneType)
- ]
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
deleted file mode 100644
index 1f98d019e..000000000
--- a/src/Text/Pandoc/Logging.hs
+++ /dev/null
@@ -1,232 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-}
-{-
-Copyright (C) 2016-17 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-{- |
- Module : Text.Pandoc.Logging
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-This module provides data types and functions for warnings
-and info messages.
-
--}
-module Text.Pandoc.Logging (
- Verbosity(..)
- , LogMessage(..)
- , encodeLogMessages
- , showLogMessage
- , messageVerbosity
- ) where
-
-import Text.Parsec.Pos
-import Data.Data (Data)
-import Data.Generics (Typeable)
-import GHC.Generics (Generic)
-import qualified Data.Text as Text
-import Data.Aeson
-import Text.Pandoc.Definition
-import Data.Aeson.Encode.Pretty (encodePretty', keyOrder,
- defConfig, Config(..))
-import qualified Data.ByteString.Lazy as BL
-
--- | Verbosity level.
-data Verbosity = ERROR | WARNING | INFO | DEBUG
- deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic)
-
-instance ToJSON Verbosity where
- toJSON x = toJSON (show x)
-
-data LogMessage =
- SkippedContent String SourcePos
- | CouldNotParseYamlMetadata String SourcePos
- | DuplicateLinkReference String SourcePos
- | DuplicateNoteReference String SourcePos
- | ReferenceNotFound String SourcePos
- | CircularReference String SourcePos
- | ParsingUnescaped String SourcePos
- | CouldNotLoadIncludeFile String SourcePos
- | ParsingTrace String SourcePos
- | InlineNotRendered Inline
- | BlockNotRendered Block
- | DocxParserWarning String
- | CouldNotFetchResource String String
- | CouldNotDetermineImageSize String String
- | CouldNotDetermineMimeType String
- | CouldNotConvertTeXMath String String
- deriving (Show, Eq, Data, Ord, Typeable, Generic)
-
-instance ToJSON LogMessage where
- toJSON x = object $ "verbosity" .= toJSON (messageVerbosity x) :
- case x of
- SkippedContent s pos ->
- ["type" .= String "SkippedContent",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= sourceLine pos,
- "column" .= sourceColumn pos]
- CouldNotParseYamlMetadata s pos ->
- ["type" .= String "YamlSectionNotAnObject",
- "message" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- DuplicateLinkReference s pos ->
- ["type" .= String "DuplicateLinkReference",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- DuplicateNoteReference s pos ->
- ["type" .= String "DuplicateNoteReference",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- ReferenceNotFound s pos ->
- ["type" .= String "ReferenceNotFound",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- CircularReference s pos ->
- ["type" .= String "CircularReference",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- ParsingUnescaped s pos ->
- ["type" .= String "ParsingUnescaped",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- CouldNotLoadIncludeFile fp pos ->
- ["type" .= String "CouldNotLoadIncludeFile",
- "path" .= Text.pack fp,
- "source" .= Text.pack (sourceName pos),
- "line" .= toJSON (sourceLine pos),
- "column" .= toJSON (sourceColumn pos)]
- ParsingTrace s pos ->
- ["type" .= String "ParsingTrace",
- "contents" .= Text.pack s,
- "source" .= Text.pack (sourceName pos),
- "line" .= sourceLine pos,
- "column" .= sourceColumn pos]
- InlineNotRendered il ->
- ["type" .= String "InlineNotRendered",
- "contents" .= toJSON il]
- BlockNotRendered bl ->
- ["type" .= String "BlockNotRendered",
- "contents" .= toJSON bl]
- DocxParserWarning s ->
- ["type" .= String "DocxParserWarning",
- "contents" .= Text.pack s]
- CouldNotFetchResource fp s ->
- ["type" .= String "CouldNotFetchResource",
- "path" .= Text.pack fp,
- "message" .= Text.pack s]
- CouldNotDetermineImageSize fp s ->
- ["type" .= String "CouldNotDetermineImageSize",
- "path" .= Text.pack fp,
- "message" .= Text.pack s]
- CouldNotDetermineMimeType fp ->
- ["type" .= String "CouldNotDetermineMimeType",
- "path" .= Text.pack fp]
- CouldNotConvertTeXMath s msg ->
- ["type" .= String "CouldNotConvertTeXMath",
- "contents" .= Text.pack s,
- "message" .= Text.pack msg]
-
-showPos :: SourcePos -> String
-showPos pos = sn ++ "line " ++
- show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
- where sn = if sourceName pos == "source" || sourceName pos == ""
- then ""
- else sourceName pos ++ " "
-
-encodeLogMessages :: [LogMessage] -> BL.ByteString
-encodeLogMessages ms =
- encodePretty' defConfig{ confCompare =
- keyOrder [ "type", "verbosity", "contents", "message", "path",
- "source", "line", "column" ] } ms
-
-showLogMessage :: LogMessage -> String
-showLogMessage msg =
- case msg of
- SkippedContent s pos ->
- "Skipped '" ++ s ++ "' at " ++ showPos pos
- CouldNotParseYamlMetadata s pos ->
- "Could not parse YAML metadata at " ++ showPos pos ++
- if null s then "" else (": " ++ s)
- DuplicateLinkReference s pos ->
- "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos
- DuplicateNoteReference s pos ->
- "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos
- ReferenceNotFound s pos ->
- "Reference not found for '" ++ s ++ "' at " ++ showPos pos
- CircularReference s pos ->
- "Circular reference '" ++ s ++ "' at " ++ showPos pos
- ParsingUnescaped s pos ->
- "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos
- CouldNotLoadIncludeFile fp pos ->
- "Could not load include file '" ++ fp ++ "' at " ++ showPos pos
- ParsingTrace s pos ->
- "Parsing trace at " ++ showPos pos ++ ": " ++ s
- InlineNotRendered il ->
- "Not rendering " ++ show il
- BlockNotRendered bl ->
- "Not rendering " ++ show bl
- DocxParserWarning s ->
- "Docx parser warning: " ++ s
- CouldNotFetchResource fp s ->
- "Could not fetch resource '" ++ fp ++ "'" ++
- if null s then "" else (": " ++ s)
- CouldNotDetermineImageSize fp s ->
- "Could not determine image size for '" ++ fp ++ "'" ++
- if null s then "" else (": " ++ s)
- CouldNotDetermineMimeType fp ->
- "Could not determine mime type for '" ++ fp ++ "'"
- CouldNotConvertTeXMath s m ->
- "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++
- if null m then "" else (':':'\n':m)
-
-messageVerbosity:: LogMessage -> Verbosity
-messageVerbosity msg =
- case msg of
- SkippedContent{} -> INFO
- CouldNotParseYamlMetadata{} -> WARNING
- DuplicateLinkReference{} -> WARNING
- DuplicateNoteReference{} -> WARNING
- ReferenceNotFound{} -> WARNING
- CircularReference{} -> WARNING
- CouldNotLoadIncludeFile{} -> WARNING
- ParsingUnescaped{} -> INFO
- ParsingTrace{} -> DEBUG
- InlineNotRendered{} -> INFO
- BlockNotRendered{} -> INFO
- DocxParserWarning{} -> WARNING
- CouldNotFetchResource{} -> WARNING
- CouldNotDetermineImageSize{} -> WARNING
- CouldNotDetermineMimeType{} -> WARNING
- CouldNotConvertTeXMath{} -> WARNING
-
-
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
deleted file mode 100644
index a08091217..000000000
--- a/src/Text/Pandoc/MIME.hs
+++ /dev/null
@@ -1,527 +0,0 @@
-{-
-Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.MIME
- Copyright : Copyright (C) 2011-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Mime type lookup for ODT writer.
--}
-module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
- extensionFromMimeType )where
-import System.FilePath
-import Data.Char ( toLower )
-import Data.List (isPrefixOf, isSuffixOf)
-import Data.Maybe (fromMaybe)
-import qualified Data.Map as M
-
-type MimeType = String
-
--- | Determine mime type appropriate for file path.
-getMimeType :: FilePath -> Maybe MimeType
-getMimeType fp
- -- ODT
- | fp == "layout-cache" =
- Just "application/binary"
- | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp =
- Just "application/vnd.oasis.opendocument.formula"
- -- generic
- | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes
-
--- | Determime mime type appropriate for file path, defaulting to
--- “application/octet-stream” if nothing else fits.
-getMimeTypeDef :: FilePath -> MimeType
-getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType
-
-extensionFromMimeType :: MimeType -> Maybe String
-extensionFromMimeType mimetype =
- M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
- -- note: we just look up the basic mime type, dropping the content-encoding etc.
-
-reverseMimeTypes :: M.Map MimeType String
-reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
-
-mimeTypes :: M.Map String MimeType
-mimeTypes = M.fromList mimeTypesList
-
-mimeTypesList :: [(String, MimeType)]
-mimeTypesList = -- List borrowed from happstack-server.
- [("gz","application/x-gzip")
- ,("cabal","application/x-cabal")
- ,("%","application/x-trash")
- ,("323","text/h323")
- ,("3gp","video/3gpp")
- ,("7z","application/x-7z-compressed")
- ,("abw","application/x-abiword")
- ,("ai","application/postscript")
- ,("aif","audio/x-aiff")
- ,("aifc","audio/x-aiff")
- ,("aiff","audio/x-aiff")
- ,("alc","chemical/x-alchemy")
- ,("art","image/x-jg")
- ,("asc","text/plain")
- ,("asf","video/x-ms-asf")
- ,("asn","chemical/x-ncbi-asn1")
- ,("aso","chemical/x-ncbi-asn1-binary")
- ,("asx","video/x-ms-asf")
- ,("atom","application/atom")
- ,("atomcat","application/atomcat+xml")
- ,("atomsrv","application/atomserv+xml")
- ,("au","audio/basic")
- ,("avi","video/x-msvideo")
- ,("b","chemical/x-molconn-Z")
- ,("bak","application/x-trash")
- ,("bat","application/x-msdos-program")
- ,("bcpio","application/x-bcpio")
- ,("bib","text/x-bibtex")
- ,("bin","application/octet-stream")
- ,("bmp","image/x-ms-bmp")
- ,("boo","text/x-boo")
- ,("book","application/x-maker")
- ,("bsd","chemical/x-crossfire")
- ,("c","text/x-csrc")
- ,("c++","text/x-c++src")
- ,("c3d","chemical/x-chem3d")
- ,("cab","application/x-cab")
- ,("cac","chemical/x-cache")
- ,("cache","chemical/x-cache")
- ,("cap","application/cap")
- ,("cascii","chemical/x-cactvs-binary")
- ,("cat","application/vnd.ms-pki.seccat")
- ,("cbin","chemical/x-cactvs-binary")
- ,("cbr","application/x-cbr")
- ,("cbz","application/x-cbz")
- ,("cc","text/x-c++src")
- ,("cdf","application/x-cdf")
- ,("cdr","image/x-coreldraw")
- ,("cdt","image/x-coreldrawtemplate")
- ,("cdx","chemical/x-cdx")
- ,("cdy","application/vnd.cinderella")
- ,("cef","chemical/x-cxf")
- ,("cer","chemical/x-cerius")
- ,("chm","chemical/x-chemdraw")
- ,("chrt","application/x-kchart")
- ,("cif","chemical/x-cif")
- ,("class","application/java-vm")
- ,("cls","text/x-tex")
- ,("cmdf","chemical/x-cmdf")
- ,("cml","chemical/x-cml")
- ,("cod","application/vnd.rim.cod")
- ,("com","application/x-msdos-program")
- ,("cpa","chemical/x-compass")
- ,("cpio","application/x-cpio")
- ,("cpp","text/x-c++src")
- ,("cpt","application/mac-compactpro")
- ,("crl","application/x-pkcs7-crl")
- ,("crt","application/x-x509-ca-cert")
- ,("csf","chemical/x-cache-csf")
- ,("csh","application/x-csh")
- ,("csm","chemical/x-csml")
- ,("csml","chemical/x-csml")
- ,("css","text/css")
- ,("csv","text/csv")
- ,("ctab","chemical/x-cactvs-binary")
- ,("ctx","chemical/x-ctx")
- ,("cu","application/cu-seeme")
- ,("cub","chemical/x-gaussian-cube")
- ,("cxf","chemical/x-cxf")
- ,("cxx","text/x-c++src")
- ,("d","text/x-dsrc")
- ,("dat","chemical/x-mopac-input")
- ,("dcr","application/x-director")
- ,("deb","application/x-debian-package")
- ,("dif","video/dv")
- ,("diff","text/x-diff")
- ,("dir","application/x-director")
- ,("djv","image/vnd.djvu")
- ,("djvu","image/vnd.djvu")
- ,("dl","video/dl")
- ,("dll","application/x-msdos-program")
- ,("dmg","application/x-apple-diskimage")
- ,("dms","application/x-dms")
- ,("doc","application/msword")
- ,("dot","application/msword")
- ,("dv","video/dv")
- ,("dvi","application/x-dvi")
- ,("dx","chemical/x-jcamp-dx")
- ,("dxr","application/x-director")
- ,("emb","chemical/x-embl-dl-nucleotide")
- ,("embl","chemical/x-embl-dl-nucleotide")
- ,("emf","image/x-emf")
- ,("eml","message/rfc822")
- ,("ent","chemical/x-ncbi-asn1-ascii")
- ,("eot","application/vnd.ms-fontobject")
- ,("eps","application/postscript")
- ,("etx","text/x-setext")
- ,("exe","application/x-msdos-program")
- ,("ez","application/andrew-inset")
- ,("fb","application/x-maker")
- ,("fbdoc","application/x-maker")
- ,("fch","chemical/x-gaussian-checkpoint")
- ,("fchk","chemical/x-gaussian-checkpoint")
- ,("fig","application/x-xfig")
- ,("flac","application/x-flac")
- ,("fli","video/fli")
- ,("fm","application/x-maker")
- ,("frame","application/x-maker")
- ,("frm","application/x-maker")
- ,("fs","text/plain")
- ,("gal","chemical/x-gaussian-log")
- ,("gam","chemical/x-gamess-input")
- ,("gamin","chemical/x-gamess-input")
- ,("gau","chemical/x-gaussian-input")
- ,("gcd","text/x-pcs-gcd")
- ,("gcf","application/x-graphing-calculator")
- ,("gcg","chemical/x-gcg8-sequence")
- ,("gen","chemical/x-genbank")
- ,("gf","application/x-tex-gf")
- ,("gif","image/gif")
- ,("gjc","chemical/x-gaussian-input")
- ,("gjf","chemical/x-gaussian-input")
- ,("gl","video/gl")
- ,("gnumeric","application/x-gnumeric")
- ,("gpt","chemical/x-mopac-graph")
- ,("gsf","application/x-font")
- ,("gsm","audio/x-gsm")
- ,("gtar","application/x-gtar")
- ,("h","text/x-chdr")
- ,("h++","text/x-c++hdr")
- ,("hdf","application/x-hdf")
- ,("hh","text/x-c++hdr")
- ,("hin","chemical/x-hin")
- ,("hpp","text/x-c++hdr")
- ,("hqx","application/mac-binhex40")
- ,("hs","text/x-haskell")
- ,("hta","application/hta")
- ,("htc","text/x-component")
- ,("htm","text/html")
- ,("html","text/html")
- ,("hxx","text/x-c++hdr")
- ,("ica","application/x-ica")
- ,("ice","x-conference/x-cooltalk")
- ,("ico","image/x-icon")
- ,("ics","text/calendar")
- ,("icz","text/calendar")
- ,("ief","image/ief")
- ,("iges","model/iges")
- ,("igs","model/iges")
- ,("iii","application/x-iphone")
- ,("inp","chemical/x-gamess-input")
- ,("ins","application/x-internet-signup")
- ,("iso","application/x-iso9660-image")
- ,("isp","application/x-internet-signup")
- ,("ist","chemical/x-isostar")
- ,("istr","chemical/x-isostar")
- ,("jad","text/vnd.sun.j2me.app-descriptor")
- ,("jar","application/java-archive")
- ,("java","text/x-java")
- ,("jdx","chemical/x-jcamp-dx")
- ,("jmz","application/x-jmol")
- ,("jng","image/x-jng")
- ,("jnlp","application/x-java-jnlp-file")
- ,("jpe","image/jpeg")
- ,("jpeg","image/jpeg")
- ,("jfif","image/jpeg")
- ,("jpg","image/jpeg")
- ,("js","application/x-javascript")
- ,("kar","audio/midi")
- ,("key","application/pgp-keys")
- ,("kil","application/x-killustrator")
- ,("kin","chemical/x-kinemage")
- ,("kml","application/vnd.google-earth.kml+xml")
- ,("kmz","application/vnd.google-earth.kmz")
- ,("kpr","application/x-kpresenter")
- ,("kpt","application/x-kpresenter")
- ,("ksp","application/x-kspread")
- ,("kwd","application/x-kword")
- ,("kwt","application/x-kword")
- ,("latex","application/x-latex")
- ,("lha","application/x-lha")
- ,("lhs","text/x-literate-haskell")
- ,("lsf","video/x-la-asf")
- ,("lsx","video/x-la-asf")
- ,("ltx","text/x-tex")
- ,("lyx","application/x-lyx")
- ,("lzh","application/x-lzh")
- ,("lzx","application/x-lzx")
- ,("m3u","audio/mpegurl")
- ,("m4a","audio/mpeg")
- ,("m4v","video/x-m4v")
- ,("maker","application/x-maker")
- ,("man","application/x-troff-man")
- ,("mcif","chemical/x-mmcif")
- ,("mcm","chemical/x-macmolecule")
- ,("mdb","application/msaccess")
- ,("me","application/x-troff-me")
- ,("mesh","model/mesh")
- ,("mid","audio/midi")
- ,("midi","audio/midi")
- ,("mif","application/x-mif")
- ,("mm","application/x-freemind")
- ,("mmd","chemical/x-macromodel-input")
- ,("mmf","application/vnd.smaf")
- ,("mml","text/mathml")
- ,("mmod","chemical/x-macromodel-input")
- ,("mng","video/x-mng")
- ,("moc","text/x-moc")
- ,("mol","chemical/x-mdl-molfile")
- ,("mol2","chemical/x-mol2")
- ,("moo","chemical/x-mopac-out")
- ,("mop","chemical/x-mopac-input")
- ,("mopcrt","chemical/x-mopac-input")
- ,("mov","video/quicktime")
- ,("movie","video/x-sgi-movie")
- ,("mp2","audio/mpeg")
- ,("mp3","audio/mpeg")
- ,("mp4","video/mp4")
- ,("mpc","chemical/x-mopac-input")
- ,("mpe","video/mpeg")
- ,("mpeg","video/mpeg")
- ,("mpega","audio/mpeg")
- ,("mpg","video/mpeg")
- ,("mpga","audio/mpeg")
- ,("ms","application/x-troff-ms")
- ,("msh","model/mesh")
- ,("msi","application/x-msi")
- ,("mvb","chemical/x-mopac-vib")
- ,("mxu","video/vnd.mpegurl")
- ,("nb","application/mathematica")
- ,("nc","application/x-netcdf")
- ,("nwc","application/x-nwc")
- ,("o","application/x-object")
- ,("oda","application/oda")
- ,("odb","application/vnd.oasis.opendocument.database")
- ,("odc","application/vnd.oasis.opendocument.chart")
- ,("odf","application/vnd.oasis.opendocument.formula")
- ,("odg","application/vnd.oasis.opendocument.graphics")
- ,("odi","application/vnd.oasis.opendocument.image")
- ,("odm","application/vnd.oasis.opendocument.text-master")
- ,("odp","application/vnd.oasis.opendocument.presentation")
- ,("ods","application/vnd.oasis.opendocument.spreadsheet")
- ,("odt","application/vnd.oasis.opendocument.text")
- ,("oga","audio/ogg")
- ,("ogg","application/ogg")
- ,("ogv","video/ogg")
- ,("ogx","application/ogg")
- ,("old","application/x-trash")
- ,("otg","application/vnd.oasis.opendocument.graphics-template")
- ,("oth","application/vnd.oasis.opendocument.text-web")
- ,("otp","application/vnd.oasis.opendocument.presentation-template")
- ,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
- ,("otf","application/vnd.ms-opentype")
- ,("ott","application/vnd.oasis.opendocument.text-template")
- ,("oza","application/x-oz-application")
- ,("p","text/x-pascal")
- ,("p7r","application/x-pkcs7-certreqresp")
- ,("pac","application/x-ns-proxy-autoconfig")
- ,("pas","text/x-pascal")
- ,("pat","image/x-coreldrawpattern")
- ,("patch","text/x-diff")
- ,("pbm","image/x-portable-bitmap")
- ,("pcap","application/cap")
- ,("pcf","application/x-font")
- ,("pcf.Z","application/x-font")
- ,("pcx","image/pcx")
- ,("pdb","chemical/x-pdb")
- ,("pdf","application/pdf")
- ,("pfa","application/x-font")
- ,("pfb","application/x-font")
- ,("pgm","image/x-portable-graymap")
- ,("pgn","application/x-chess-pgn")
- ,("pgp","application/pgp-signature")
- ,("php","application/x-httpd-php")
- ,("php3","application/x-httpd-php3")
- ,("php3p","application/x-httpd-php3-preprocessed")
- ,("php4","application/x-httpd-php4")
- ,("phps","application/x-httpd-php-source")
- ,("pht","application/x-httpd-php")
- ,("phtml","application/x-httpd-php")
- ,("pk","application/x-tex-pk")
- ,("pl","text/x-perl")
- ,("pls","audio/x-scpls")
- ,("pm","text/x-perl")
- ,("png","image/png")
- ,("pnm","image/x-portable-anymap")
- ,("pot","text/plain")
- ,("ppm","image/x-portable-pixmap")
- ,("pps","application/vnd.ms-powerpoint")
- ,("ppt","application/vnd.ms-powerpoint")
- ,("prf","application/pics-rules")
- ,("prt","chemical/x-ncbi-asn1-ascii")
- ,("ps","application/postscript")
- ,("psd","image/x-photoshop")
- ,("py","text/x-python")
- ,("pyc","application/x-python-code")
- ,("pyo","application/x-python-code")
- ,("qt","video/quicktime")
- ,("qtl","application/x-quicktimeplayer")
- ,("ra","audio/x-pn-realaudio")
- ,("ram","audio/x-pn-realaudio")
- ,("rar","application/rar")
- ,("ras","image/x-cmu-raster")
- ,("rd","chemical/x-mdl-rdfile")
- ,("rdf","application/rdf+xml")
- ,("rgb","image/x-rgb")
- ,("rhtml","application/x-httpd-eruby")
- ,("rm","audio/x-pn-realaudio")
- ,("roff","application/x-troff")
- ,("ros","chemical/x-rosdal")
- ,("rpm","application/x-redhat-package-manager")
- ,("rss","application/rss+xml")
- ,("rtf","application/rtf")
- ,("rtx","text/richtext")
- ,("rxn","chemical/x-mdl-rxnfile")
- ,("sct","text/scriptlet")
- ,("sd","chemical/x-mdl-sdfile")
- ,("sd2","audio/x-sd2")
- ,("sda","application/vnd.stardivision.draw")
- ,("sdc","application/vnd.stardivision.calc")
- ,("sdd","application/vnd.stardivision.impress")
- ,("sdf","application/vnd.stardivision.math")
- ,("sds","application/vnd.stardivision.chart")
- ,("sdw","application/vnd.stardivision.writer")
- ,("ser","application/java-serialized-object")
- ,("sgf","application/x-go-sgf")
- ,("sgl","application/vnd.stardivision.writer-global")
- ,("sh","application/x-sh")
- ,("shar","application/x-shar")
- ,("shtml","text/html")
- ,("sid","audio/prs.sid")
- ,("sik","application/x-trash")
- ,("silo","model/mesh")
- ,("sis","application/vnd.symbian.install")
- ,("sisx","x-epoc/x-sisx-app")
- ,("sit","application/x-stuffit")
- ,("sitx","application/x-stuffit")
- ,("skd","application/x-koan")
- ,("skm","application/x-koan")
- ,("skp","application/x-koan")
- ,("skt","application/x-koan")
- ,("smi","application/smil")
- ,("smil","application/smil")
- ,("snd","audio/basic")
- ,("spc","chemical/x-galactic-spc")
- ,("spl","application/futuresplash")
- ,("spx","audio/ogg")
- ,("src","application/x-wais-source")
- ,("stc","application/vnd.sun.xml.calc.template")
- ,("std","application/vnd.sun.xml.draw.template")
- ,("sti","application/vnd.sun.xml.impress.template")
- ,("stl","application/vnd.ms-pki.stl")
- ,("stw","application/vnd.sun.xml.writer.template")
- ,("sty","text/x-tex")
- ,("sv4cpio","application/x-sv4cpio")
- ,("sv4crc","application/x-sv4crc")
- ,("svg","image/svg+xml")
- -- removed for now, since it causes problems with
- -- extensionFromMimeType: see #2183.
- -- ,("svgz","image/svg+xml")
- ,("sw","chemical/x-swissprot")
- ,("swf","application/x-shockwave-flash")
- ,("swfl","application/x-shockwave-flash")
- ,("sxc","application/vnd.sun.xml.calc")
- ,("sxd","application/vnd.sun.xml.draw")
- ,("sxg","application/vnd.sun.xml.writer.global")
- ,("sxi","application/vnd.sun.xml.impress")
- ,("sxm","application/vnd.sun.xml.math")
- ,("sxw","application/vnd.sun.xml.writer")
- ,("t","application/x-troff")
- ,("tar","application/x-tar")
- ,("taz","application/x-gtar")
- ,("tcl","application/x-tcl")
- ,("tex","text/x-tex")
- ,("texi","application/x-texinfo")
- ,("texinfo","application/x-texinfo")
- ,("text","text/plain")
- ,("tgf","chemical/x-mdl-tgf")
- ,("tgz","application/x-gtar")
- ,("tif","image/tiff")
- ,("tiff","image/tiff")
- ,("tk","text/x-tcl")
- ,("tm","text/texmacs")
- ,("torrent","application/x-bittorrent")
- ,("tr","application/x-troff")
- ,("ts","text/texmacs")
- ,("tsp","application/dsptype")
- ,("tsv","text/tab-separated-values")
- ,("ttf","application/x-font-truetype")
- ,("txt","text/plain")
- ,("udeb","application/x-debian-package")
- ,("uls","text/iuls")
- ,("ustar","application/x-ustar")
- ,("val","chemical/x-ncbi-asn1-binary")
- ,("vcd","application/x-cdlink")
- ,("vcf","text/x-vcard")
- ,("vcs","text/x-vcalendar")
- ,("vmd","chemical/x-vmd")
- ,("vms","chemical/x-vamas-iso14976")
- ,("vrm","x-world/x-vrml")
- ,("vrml","model/vrml")
- ,("vs","text/plain")
- ,("vsd","application/vnd.visio")
- ,("vtt","text/vtt")
- ,("wad","application/x-doom")
- ,("wav","audio/x-wav")
- ,("wax","audio/x-ms-wax")
- ,("wbmp","image/vnd.wap.wbmp")
- ,("wbxml","application/vnd.wap.wbxml")
- ,("webm","video/webm")
- ,("wk","application/x-123")
- ,("wm","video/x-ms-wm")
- ,("wma","audio/x-ms-wma")
- ,("wmd","application/x-ms-wmd")
- ,("wmf","image/x-wmf")
- ,("wml","text/vnd.wap.wml")
- ,("wmlc","application/vnd.wap.wmlc")
- ,("wmls","text/vnd.wap.wmlscript")
- ,("wmlsc","application/vnd.wap.wmlscriptc")
- ,("wmv","video/x-ms-wmv")
- ,("wmx","video/x-ms-wmx")
- ,("wmz","application/x-ms-wmz")
- ,("woff","application/font-woff")
- ,("woff2","font/woff2")
- ,("wp5","application/wordperfect5.1")
- ,("wpd","application/wordperfect")
- ,("wrl","model/vrml")
- ,("wsc","text/scriptlet")
- ,("wvx","video/x-ms-wvx")
- ,("wz","application/x-wingz")
- ,("xbm","image/x-xbitmap")
- ,("xcf","application/x-xcf")
- ,("xht","application/xhtml+xml")
- ,("xhtml","application/xhtml+xml")
- ,("xlb","application/vnd.ms-excel")
- ,("xls","application/vnd.ms-excel")
- ,("xlt","application/vnd.ms-excel")
- ,("xml","application/xml")
- ,("xpi","application/x-xpinstall")
- ,("xpm","image/x-xpixmap")
- ,("xsl","application/xml")
- ,("xtel","chemical/x-xtel")
- ,("xul","application/vnd.mozilla.xul+xml")
- ,("xwd","image/x-xwindowdump")
- ,("xyz","chemical/x-xyz")
- ,("zip","application/zip")
- ,("zmt","chemical/x-mopac-input")
- ]
-
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
deleted file mode 100644
index fe99be5fe..000000000
--- a/src/Text/Pandoc/MediaBag.hs
+++ /dev/null
@@ -1,113 +0,0 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
-{-
-Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.MediaBag
- Copyright : Copyright (C) 2014 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Definition of a MediaBag object to hold binary resources, and an
-interface for interacting with it.
--}
-module Text.Pandoc.MediaBag (
- MediaBag,
- lookupMedia,
- insertMedia,
- mediaDirectory,
- extractMediaBag
- ) where
-import System.FilePath
-import qualified System.FilePath.Posix as Posix
-import System.Directory (createDirectoryIfMissing)
-import qualified Data.Map as M
-import qualified Data.ByteString.Lazy as BL
-import Control.Monad (when)
-import Control.Monad.Trans (MonadIO(..))
-import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
-import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Maybe (fromMaybe)
-import System.IO (stderr)
-import Data.Data (Data)
-import Data.Typeable (Typeable)
-
--- | A container for a collection of binary resources, with names and
--- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
--- can be used for an empty 'MediaBag', and '<>' can be used to append
--- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString))
- deriving (Monoid, Data, Typeable)
-
-instance Show MediaBag where
- show bag = "MediaBag " ++ show (mediaDirectory bag)
-
--- | Insert a media item into a 'MediaBag', replacing any existing
--- value with the same name.
-insertMedia :: FilePath -- ^ relative path and canonical name of resource
- -> Maybe MimeType -- ^ mime type (Nothing = determine from extension)
- -> BL.ByteString -- ^ contents of resource
- -> MediaBag
- -> MediaBag
-insertMedia fp mbMime contents (MediaBag mediamap) =
- MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap)
- where mime = fromMaybe fallback mbMime
- fallback = case takeExtension fp of
- ".gz" -> getMimeTypeDef $ dropExtension fp
- _ -> getMimeTypeDef fp
-
--- | Lookup a media item in a 'MediaBag', returning mime type and contents.
-lookupMedia :: FilePath
- -> MediaBag
- -> Maybe (MimeType, BL.ByteString)
-lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap
-
--- | Get a list of the file paths stored in a 'MediaBag', with
--- their corresponding mime types and the lengths in bytes of the contents.
-mediaDirectory :: MediaBag -> [(String, MimeType, Int)]
-mediaDirectory (MediaBag mediamap) =
- M.foldWithKey (\fp (mime,contents) ->
- (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap
-
--- | Extract contents of MediaBag to a given directory. Print informational
--- messages if 'verbose' is true.
--- TODO: eventually we may want to put this into PandocMonad
--- In PandocPure, it could write to the fake file system...
-extractMediaBag :: MonadIO m
- => Bool
- -> FilePath
- -> MediaBag
- -> m ()
-extractMediaBag verbose dir (MediaBag mediamap) = liftIO $ do
- sequence_ $ M.foldWithKey
- (\fp (_ ,contents) ->
- ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap
-
-writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
-writeMedia verbose dir (subpath, bs) = do
- -- we join and split to convert a/b/c to a\b\c on Windows;
- -- in zip containers all paths use /
- let fullpath = dir </> normalise subpath
- createDirectoryIfMissing True $ takeDirectory fullpath
- when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
- BL.writeFile fullpath bs
-
-
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
deleted file mode 100644
index bc62f87d0..000000000
--- a/src/Text/Pandoc/Options.hs
+++ /dev/null
@@ -1,217 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
-{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Options
- Copyright : Copyright (C) 2012-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Data structures and functions for representing parser and writer
-options.
--}
-module Text.Pandoc.Options ( module Text.Pandoc.Extensions
- , ReaderOptions(..)
- , HTMLMathMethod (..)
- , CiteMethod (..)
- , ObfuscationMethod (..)
- , HTMLSlideVariant (..)
- , EPUBVersion (..)
- , WrapOption (..)
- , TopLevelDivision (..)
- , WriterOptions (..)
- , TrackChanges (..)
- , ReferenceLocation (..)
- , def
- , isEnabled
- ) where
-import Text.Pandoc.Extensions
-import Data.Default
-import Text.Pandoc.Highlighting (Style, pygments)
-import Data.Data (Data)
-import Data.Typeable (Typeable)
-import GHC.Generics (Generic)
-
-data ReaderOptions = ReaderOptions{
- readerExtensions :: Extensions -- ^ Syntax extensions
- , readerStandalone :: Bool -- ^ Standalone document with header
- , readerColumns :: Int -- ^ Number of columns in terminal
- , readerTabStop :: Int -- ^ Tab stop
- , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
- , readerIndentedCodeClasses :: [String] -- ^ Default classes for
- -- indented code blocks
- , readerDefaultImageExtension :: String -- ^ Default extension for images
- , readerTrackChanges :: TrackChanges
-} deriving (Show, Read, Data, Typeable, Generic)
-
-instance Default ReaderOptions
- where def = ReaderOptions{
- readerExtensions = emptyExtensions
- , readerStandalone = False
- , readerColumns = 80
- , readerTabStop = 4
- , readerApplyMacros = True
- , readerIndentedCodeClasses = []
- , readerDefaultImageExtension = ""
- , readerTrackChanges = AcceptChanges
- }
-
---
--- Writer options
---
-
-data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic)
-
-data HTMLMathMethod = PlainMath
- | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
- | JsMath (Maybe String) -- url of jsMath load script
- | GladTeX
- | WebTeX String -- url of TeX->image script.
- | MathML
- | MathJax String -- url of MathJax.js
- | KaTeX String String -- url of stylesheet and katex.js
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
-data CiteMethod = Citeproc -- use citeproc to render them
- | Natbib -- output natbib cite commands
- | Biblatex -- output biblatex cite commands
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Methods for obfuscating email addresses in HTML.
-data ObfuscationMethod = NoObfuscation
- | ReferenceObfuscation
- | JavascriptObfuscation
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Varieties of HTML slide shows.
-data HTMLSlideVariant = S5Slides
- | SlidySlides
- | SlideousSlides
- | DZSlides
- | RevealJsSlides
- | NoSlides
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Options for accepting or rejecting MS Word track-changes.
-data TrackChanges = AcceptChanges
- | RejectChanges
- | AllChanges
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Options for wrapping text in the output.
-data WrapOption = WrapAuto -- ^ Automatically wrap to width
- | WrapNone -- ^ No non-semantic newlines
- | WrapPreserve -- ^ Preserve wrapping of input source
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Options defining the type of top-level headers.
-data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts
- | TopLevelChapter -- ^ Top-level headers become chapters
- | TopLevelSection -- ^ Top-level headers become sections
- | TopLevelDefault -- ^ Top-level type is determined via
- -- heuristics
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Locations for footnotes and references in markdown output
-data ReferenceLocation = EndOfBlock -- ^ End of block
- | EndOfSection -- ^ prior to next section header (or end of document)
- | EndOfDocument -- ^ at end of document
- deriving (Show, Read, Eq, Data, Typeable, Generic)
-
--- | Options for writers
-data WriterOptions = WriterOptions
- { writerTemplate :: Maybe String -- ^ Template to use
- , writerVariables :: [(String, String)] -- ^ Variables to set in template
- , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerIncremental :: Bool -- ^ True if lists should be incremental
- , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ...
- , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
- , writerExtensions :: Extensions -- ^ Markdown extensions that can be used
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions
- , writerWrapText :: WrapOption -- ^ Option for wrapping text
- , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
- , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
- , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
- -- and for footnote marks in markdown
- , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file
- , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
- , writerCiteMethod :: CiteMethod -- ^ How to print cites
- , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML
- , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
- , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions
- , writerListings :: Bool -- ^ Use listings package for code
- , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting
- -- (Nothing = no highlighting)
- , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
- , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB
- , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line
- , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
- , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
- , writerTOCDepth :: Int -- ^ Number of levels to include in TOC
- , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
- , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
- , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
- } deriving (Show, Data, Typeable, Generic)
-
-instance Default WriterOptions where
- def = WriterOptions { writerTemplate = Nothing
- , writerVariables = []
- , writerTabStop = 4
- , writerTableOfContents = False
- , writerIncremental = False
- , writerHTMLMathMethod = PlainMath
- , writerNumberSections = False
- , writerNumberOffset = [0,0,0,0,0,0]
- , writerSectionDivs = False
- , writerExtensions = emptyExtensions
- , writerReferenceLinks = False
- , writerDpi = 96
- , writerWrapText = WrapAuto
- , writerColumns = 72
- , writerEmailObfuscation = NoObfuscation
- , writerIdentifierPrefix = ""
- , writerSourceURL = Nothing
- , writerUserDataDir = Nothing
- , writerCiteMethod = Citeproc
- , writerHtmlQTags = False
- , writerSlideLevel = Nothing
- , writerTopLevelDivision = TopLevelDefault
- , writerListings = False
- , writerHighlightStyle = Just pygments
- , writerSetextHeaders = True
- , writerEpubMetadata = Nothing
- , writerEpubStylesheet = Nothing
- , writerEpubFonts = []
- , writerEpubChapterLevel = 1
- , writerTOCDepth = 3
- , writerReferenceDoc = Nothing
- , writerLaTeXArgs = []
- , writerReferenceLocation = EndOfDocument
- }
-
--- | Returns True if the given extension is enabled.
-isEnabled :: Extension -> WriterOptions -> Bool
-isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts)
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
deleted file mode 100644
index 1b3b4eb88..000000000
--- a/src/Text/Pandoc/PDF.hs
+++ /dev/null
@@ -1,369 +0,0 @@
-{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-}
-{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.PDF
- Copyright : Copyright (C) 2012-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of LaTeX documents to PDF.
--}
-module Text.Pandoc.PDF ( makePDF ) where
-
-import Data.ByteString.Lazy (ByteString)
-import qualified Data.ByteString.Lazy as B
-import qualified Data.ByteString.Lazy.Char8 as BC
-import qualified Data.ByteString as BS
-import Data.Monoid ((<>))
-import System.Exit (ExitCode (..))
-import System.FilePath
-import System.IO (stdout)
-import System.IO.Temp (withTempFile)
-import System.Directory
-import Data.Digest.Pure.SHA (showDigest, sha1)
-import System.Environment
-import Control.Monad (unless, when, (<=<))
-import qualified Control.Exception as E
-import Data.List (isInfixOf)
-import Data.Maybe (fromMaybe)
-import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Definition
-import Text.Pandoc.MediaBag
-import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Shared (warn, withTempDir, inDirectory, stringify)
-import Text.Pandoc.Writers.Shared (getField, metaToJSON)
-import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
-import Text.Pandoc.Logging (Verbosity(..))
-import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
-import Text.Pandoc.Process (pipeProcess)
-import Control.Monad.Trans (MonadIO(..))
-import qualified Data.ByteString.Lazy as BL
-import qualified Codec.Picture as JP
-#ifdef _WINDOWS
-import Data.List (intercalate)
-#endif
-import Text.Pandoc.Class (PandocIO, runIOorExplode, fetchItem, setMediaBag, runIO)
-
-#ifdef _WINDOWS
-changePathSeparators :: FilePath -> FilePath
-changePathSeparators = intercalate "/" . splitDirectories
-#endif
-
-makePDF :: MonadIO m
- => String -- ^ pdf creator (pdflatex, lualatex,
- -- xelatex, context, wkhtmltopdf)
- -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer
- -> WriterOptions -- ^ options
- -> Verbosity -- ^ verbosity level
- -> MediaBag -- ^ media
- -> Pandoc -- ^ document
- -> m (Either ByteString ByteString)
-makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do
- let mathArgs = case writerHTMLMathMethod opts of
- -- with MathJax, wait til all math is rendered:
- MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
- "--window-status", "mathjax_loaded"]
- _ -> []
- meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
- let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
- let args = mathArgs ++
- concatMap toArgs
- [("page-size", getField "papersize" meta')
- ,("title", getField "title" meta')
- ,("margin-bottom", fromMaybe (Just "1.2in")
- (getField "margin-bottom" meta'))
- ,("margin-top", fromMaybe (Just "1.25in")
- (getField "margin-top" meta'))
- ,("margin-right", fromMaybe (Just "1.25in")
- (getField "margin-right" meta'))
- ,("margin-left", fromMaybe (Just "1.25in")
- (getField "margin-left" meta'))
- ]
- source <- runIOorExplode $ writer opts doc
- html2pdf verbosity args source
-makePDF program writer opts verbosity mediabag doc =
- liftIO $ withTempDir "tex2pdf." $ \tmpdir -> do
- doc' <- handleImages opts mediabag tmpdir doc
- source <- runIOorExplode $ writer opts doc'
- let args = writerLaTeXArgs opts
- case takeBaseName program of
- "context" -> context2pdf verbosity tmpdir source
- prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
- -> tex2pdf' verbosity args tmpdir program source
- _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
-
-handleImages :: WriterOptions
- -> MediaBag
- -> FilePath -- ^ temp dir to store images
- -> Pandoc -- ^ document
- -> IO Pandoc
-handleImages opts mediabag tmpdir =
- walkM (convertImages tmpdir) <=< walkM (handleImage' opts mediabag tmpdir)
-
-handleImage' :: WriterOptions
- -> MediaBag
- -> FilePath
- -> Inline
- -> IO Inline
-handleImage' opts mediabag tmpdir (Image attr ils (src,tit)) = do
- exists <- doesFileExist src
- if exists
- then return $ Image attr ils (src,tit)
- else do
- res <- runIO $ do
- setMediaBag mediabag
- fetchItem (writerSourceURL opts) src
- case res of
- Right (contents, Just mime) -> do
- let ext = fromMaybe (takeExtension src) $
- extensionFromMimeType mime
- let basename = showDigest $ sha1 $ BL.fromChunks [contents]
- let fname = tmpdir </> basename <.> ext
- BS.writeFile fname contents
- return $ Image attr ils (fname,tit)
- _ -> do
- warn $ "Could not find image `" ++ src ++ "', skipping..."
- -- return alt text
- return $ Emph ils
-handleImage' _ _ _ x = return x
-
-convertImages :: FilePath -> Inline -> IO Inline
-convertImages tmpdir (Image attr ils (src, tit)) = do
- img <- convertImage tmpdir src
- newPath <-
- case img of
- Left e -> src <$ warn e
- Right fp -> return fp
- return (Image attr ils (newPath, tit))
-convertImages _ x = return x
-
--- Convert formats which do not work well in pdf to png
-convertImage :: FilePath -> FilePath -> IO (Either String FilePath)
-convertImage tmpdir fname =
- case mime of
- Just "image/png" -> doNothing
- Just "image/jpeg" -> doNothing
- Just "application/pdf" -> doNothing
- _ -> JP.readImage fname >>= \res ->
- case res of
- Left _ -> return $ Left $ "Unable to convert `" ++
- fname ++ "' for use with pdflatex."
- Right img ->
- E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
- \(e :: E.SomeException) -> return (Left (show e))
- where
- fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir
- mime = getMimeType fname
- doNothing = return (Right fname)
-
-tex2pdf' :: Verbosity -- ^ Verbosity level
- -> [String] -- ^ Arguments to the latex-engine
- -> FilePath -- ^ temp directory for output
- -> String -- ^ tex program
- -> String -- ^ tex source
- -> IO (Either ByteString ByteString)
-tex2pdf' verbosity args tmpDir program source = do
- let numruns = if "\\tableofcontents" `isInfixOf` source
- then 3 -- to get page numbers
- else 2 -- 1 run won't give you PDF bookmarks
- (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source
- case (exit, mbPdf) of
- (ExitFailure _, _) -> do
- let logmsg = extractMsg log'
- let extramsg =
- case logmsg of
- x | "! Package inputenc Error" `BC.isPrefixOf` x
- && program /= "xelatex"
- -> "\nTry running pandoc with --latex-engine=xelatex."
- _ -> ""
- return $ Left $ logmsg <> extramsg
- (ExitSuccess, Nothing) -> return $ Left ""
- (ExitSuccess, Just pdf) -> return $ Right pdf
-
--- parsing output
-
-extractMsg :: ByteString -> ByteString
-extractMsg log' = do
- let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
- let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
- let lineno = take 1 rest
- if null msg'
- then log'
- else BC.unlines (msg'' ++ lineno)
-
-extractConTeXtMsg :: ByteString -> ByteString
-extractConTeXtMsg log' = do
- let msg' = take 1 $
- dropWhile (not . ("tex error" `BC.isPrefixOf`)) $ BC.lines log'
- if null msg'
- then log'
- else BC.unlines msg'
-
--- running tex programs
-
--- Run a TeX program on an input bytestring and return (exit code,
--- contents of stdout, contents of produced PDF if any). Rerun
--- a fixed number of times to resolve references.
-runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath
- -> String -> IO (ExitCode, ByteString, Maybe ByteString)
-runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
- let file = tmpDir </> "input.tex"
- exists <- doesFileExist file
- unless exists $ UTF8.writeFile file source
-#ifdef _WINDOWS
- -- note: we want / even on Windows, for TexLive
- let tmpDir' = changePathSeparators tmpDir
- let file' = changePathSeparators file
-#else
- let tmpDir' = tmpDir
- let file' = file
-#endif
- let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", tmpDir'] ++ args ++ [file']
- env' <- getEnvironment
- let sep = [searchPathSeparator]
- let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
- $ lookup "TEXINPUTS" env'
- let env'' = ("TEXINPUTS", texinputs) :
- [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
- when (verbosity >= INFO && runNumber == 1) $ do
- putStrLn "[makePDF] temp dir:"
- putStrLn tmpDir'
- putStrLn "[makePDF] Command line:"
- putStrLn $ program ++ " " ++ unwords (map show programArgs)
- putStr "\n"
- putStrLn "[makePDF] Environment:"
- mapM_ print env''
- putStr "\n"
- putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
- B.readFile file' >>= B.putStr
- putStr "\n"
- (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty
- when (verbosity >= INFO) $ do
- putStrLn $ "[makePDF] Run #" ++ show runNumber
- B.hPutStr stdout out
- putStr "\n"
- if runNumber <= numRuns
- then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
- else do
- let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
- pdfExists <- doesFileExist pdfFile
- pdf <- if pdfExists
- -- We read PDF as a strict bytestring to make sure that the
- -- temp directory is removed on Windows.
- -- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
- else return Nothing
- return (exit, out, pdf)
-
-html2pdf :: Verbosity -- ^ Verbosity level
- -> [String] -- ^ Args to wkhtmltopdf
- -> String -- ^ HTML5 source
- -> IO (Either ByteString ByteString)
-html2pdf verbosity args source = do
- file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
- pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
- UTF8.writeFile file source
- let programArgs = args ++ [file, pdfFile]
- env' <- getEnvironment
- when (verbosity >= INFO) $ do
- putStrLn "[makePDF] Command line:"
- putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
- putStr "\n"
- putStrLn "[makePDF] Environment:"
- mapM_ print env'
- putStr "\n"
- putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
- putStr "\n"
- (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty
- removeFile file
- when (verbosity >= INFO) $ do
- B.hPutStr stdout out
- putStr "\n"
- pdfExists <- doesFileExist pdfFile
- mbPdf <- if pdfExists
- -- We read PDF as a strict bytestring to make sure that the
- -- temp directory is removed on Windows.
- -- See https://github.com/jgm/pandoc/issues/1192.
- then do
- res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
- removeFile pdfFile
- return res
- else return Nothing
- return $ case (exit, mbPdf) of
- (ExitFailure _, _) -> Left out
- (ExitSuccess, Nothing) -> Left ""
- (ExitSuccess, Just pdf) -> Right pdf
-
-context2pdf :: Verbosity -- ^ Verbosity level
- -> FilePath -- ^ temp directory for output
- -> String -- ^ ConTeXt source
- -> IO (Either ByteString ByteString)
-context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
- let file = "input.tex"
- UTF8.writeFile file source
-#ifdef _WINDOWS
- -- note: we want / even on Windows, for TexLive
- let tmpDir' = changePathSeparators tmpDir
-#else
- let tmpDir' = tmpDir
-#endif
- let programArgs = "--batchmode" : [file]
- env' <- getEnvironment
- let sep = [searchPathSeparator]
- let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++)
- $ lookup "TEXINPUTS" env'
- let env'' = ("TEXINPUTS", texinputs) :
- [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
- when (verbosity >= INFO) $ do
- putStrLn "[makePDF] temp dir:"
- putStrLn tmpDir'
- putStrLn "[makePDF] Command line:"
- putStrLn $ "context" ++ " " ++ unwords (map show programArgs)
- putStr "\n"
- putStrLn "[makePDF] Environment:"
- mapM_ print env''
- putStr "\n"
- putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
- putStr "\n"
- (exit, out) <- pipeProcess (Just env'') "context" programArgs BL.empty
- when (verbosity >= INFO) $ do
- B.hPutStr stdout out
- putStr "\n"
- let pdfFile = replaceExtension file ".pdf"
- pdfExists <- doesFileExist pdfFile
- mbPdf <- if pdfExists
- -- We read PDF as a strict bytestring to make sure that the
- -- temp directory is removed on Windows.
- -- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
- else return Nothing
- case (exit, mbPdf) of
- (ExitFailure _, _) -> do
- let logmsg = extractConTeXtMsg out
- return $ Left logmsg
- (ExitSuccess, Nothing) -> return $ Left ""
- (ExitSuccess, Just pdf) -> return $ Right pdf
-
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
deleted file mode 100644
index 400d07f2a..000000000
--- a/src/Text/Pandoc/Parsing.hs
+++ /dev/null
@@ -1,1329 +0,0 @@
-{-# LANGUAGE
- FlexibleContexts
-, GeneralizedNewtypeDeriving
-, TypeSynonymInstances
-, MultiParamTypeClasses
-, FlexibleInstances
-, IncoherentInstances #-}
-
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Parsing
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-A utility library with parsers used in pandoc readers.
--}
-module Text.Pandoc.Parsing ( anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- oneOfStringsCI,
- spaceChar,
- nonspaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- romanNumeral,
- emailAddress,
- uri,
- mathInline,
- mathDisplay,
- withHorizDisplacement,
- withRaw,
- escaped,
- characterReference,
- anyOrderedListMarker,
- orderedListMarker,
- charRef,
- lineBlockLines,
- tableWith,
- widthsFromIndices,
- gridTableWith,
- readWith,
- readWithM,
- testStringWith,
- guardEnabled,
- guardDisabled,
- updateLastStrPos,
- notAfterString,
- logMessage,
- reportLogMessages,
- ParserState (..),
- HasReaderOptions (..),
- HasHeaderMap (..),
- HasIdentifierList (..),
- HasMacros (..),
- HasLogMessages (..),
- HasLastStrPosition (..),
- defaultParserState,
- HeaderType (..),
- ParserContext (..),
- QuoteContext (..),
- HasQuoteContext (..),
- NoteTable,
- NoteTable',
- KeyTable,
- SubstTable,
- Key (..),
- toKey,
- registerHeader,
- smartPunctuation,
- singleQuoteStart,
- singleQuoteEnd,
- doubleQuoteStart,
- doubleQuoteEnd,
- ellipses,
- apostrophe,
- dash,
- nested,
- citeKey,
- macro,
- applyMacros',
- Parser,
- ParserT,
- F(..),
- runF,
- askF,
- asksF,
- token,
- (<+?>),
- extractIdClass,
- insertIncludedFile,
- -- * Re-exports from Text.Pandoc.Parsec
- Stream,
- runParser,
- runParserT,
- parse,
- anyToken,
- getInput,
- setInput,
- unexpected,
- char,
- letter,
- digit,
- alphaNum,
- skipMany,
- skipMany1,
- spaces,
- space,
- anyChar,
- satisfy,
- newline,
- string,
- count,
- eof,
- noneOf,
- oneOf,
- lookAhead,
- notFollowedBy,
- many,
- many1,
- manyTill,
- (<|>),
- (<?>),
- choice,
- try,
- sepBy,
- sepBy1,
- sepEndBy,
- sepEndBy1,
- endBy,
- endBy1,
- option,
- optional,
- optionMaybe,
- getState,
- setState,
- updateState,
- SourcePos,
- getPosition,
- setPosition,
- sourceColumn,
- sourceLine,
- setSourceColumn,
- setSourceLine,
- newPos
- )
-where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.XML (fromEntities)
-import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
- isHexDigit, isSpace, isPunctuation )
-import Data.List ( intercalate, transpose, isSuffixOf )
-import Text.Pandoc.Shared
-import qualified Data.Map as M
-import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
- parseMacroDefinitions)
-import Text.HTML.TagSoup.Entity ( lookupEntity )
-import Text.Pandoc.Asciify (toAsciiChar)
-import Data.Monoid ((<>))
-import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report)
-import Text.Pandoc.Logging
-import Data.Default
-import qualified Data.Set as Set
-import Control.Monad.Reader
-import Control.Monad.Identity
-import Data.Maybe (catMaybes)
-
-import Text.Pandoc.Error
-import Control.Monad.Except
-
-type Parser t s = Parsec t s
-
-type ParserT = ParsecT
-
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> ParserState -> a
-runF = runReader . unF
-
-askF :: F ParserState
-askF = F ask
-
-asksF :: (ParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = liftM mconcat . sequence
-
--- | Parse any line of text
-anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
-anyLine = do
- -- This is much faster than:
- -- manyTill anyChar newline
- inp <- getInput
- pos <- getPosition
- case break (=='\n') inp of
- (this, '\n':rest) -> do
- -- needed to persuade parsec that this won't match an empty string:
- anyChar
- setInput rest
- setPosition $ incSourceLine (setSourceColumn pos 1) 1
- return this
- _ -> mzero
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: Stream s m t
- => ParserT s st m a
- -> ParserT s st m end
- -> ParserT s st m [a]
-many1Till p end = do
- first <- p
- rest <- manyTill p end
- return (first:rest)
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
-notFollowedBy' p = try $ join $ do a <- try p
- return (unexpected (show a))
- <|>
- return (return ())
--- (This version due to Andrew Pimlott on the Haskell mailing list.)
-
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
-oneOfStrings' _ [] = fail "no strings"
-oneOfStrings' matches strs = try $ do
- c <- anyChar
- let strs' = [xs | (x:xs) <- strs, x `matches` c]
- case strs' of
- [] -> fail "not found"
- _ -> (c:) <$> oneOfStrings' matches strs'
- <|> if "" `elem` strs'
- then return [c]
- else fail "not found"
-
--- | Parses one of a list of strings. If the list contains
--- two strings one of which is a prefix of the other, the longer
--- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
-oneOfStrings = oneOfStrings' (==)
-
--- | Parses one of a list of strings (tried in order), case insensitive.
-oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
-oneOfStringsCI = oneOfStrings' ciMatch
- where ciMatch x y = toLower' x == toLower' y
- -- this optimizes toLower by checking common ASCII case
- -- first, before calling the expensive unicode-aware
- -- function:
- toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
- | isAscii c = c
- | otherwise = toLower c
-
--- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
-spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-
--- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
-nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
-blankline = try $ skipSpaces >> newline
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m [Char]
-blanklines = many1 blankline
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
- -> ParserT s st m end -- ^ end parser
- -> ParserT s st m a -- ^ content parser (to be used repeatedly)
- -> ParserT s st m [a]
-enclosed start end parser = try $
- start >> notFollowedBy space >> many1Till parser end
-
--- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = do
- firstChar <- char (toUpper x) <|> char (toLower x)
- rest <- stringAnyCase xs
- return (firstChar:rest)
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
-parseFromString parser str = do
- oldPos <- getPosition
- oldInput <- getInput
- setInput str
- result <- parser
- spaces
- eof
- setInput oldInput
- setPosition oldPos
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: Stream [Char] m Char => ParserT [Char] st m String
-lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
--- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
- -> ParserT s st m String
-charsInBalanced open close parser = try $ do
- char open
- let isDelim c = c == open || c == close
- raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser)
- <|> (do res <- charsInBalanced open close parser
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
-
--- old charsInBalanced would be:
--- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
--- old charsInBalanced' would be:
--- charsInBalanced open close anyChar
-
--- Auxiliary functions for romanNumeral:
-
-lowercaseRomanDigits :: [Char]
-lowercaseRomanDigits = ['i','v','x','l','c','d','m']
-
-uppercaseRomanDigits :: [Char]
-uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
- -> ParserT s st m Int
-romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
- else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
- map char romanDigits
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- Parsers for email addresses and URIs
-
--- | Parses an email address; returns original and corresponding
--- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (String, String)
-emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
- where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
- in (full, escapeURI $ "mailto:" ++ full)
- mailbox = intercalate "." <$> (emailWord `sepby1` dot)
- domain = intercalate "." <$> (subdomain `sepby1` dot)
- dot = char '.'
- subdomain = many1 $ alphaNum <|> innerPunct
- -- this excludes some valid email addresses, since an
- -- email could contain e.g. '__', but gives better results
- -- for our purposes, when combined with markdown parsing:
- innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
- <* notFollowedBy space
- <* notFollowedBy (satisfy isPunctuation))
- -- technically an email address could begin with a symbol,
- -- but allowing this creates too many problems.
- -- See e.g. https://github.com/jgm/pandoc/issues/2940
- emailWord = do x <- satisfy isAlphaNum
- xs <- many (satisfy isEmailChar)
- return (x:xs)
- isEmailChar c = isAlphaNum c || isEmailPunct c
- isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
- -- note: sepBy1 from parsec consumes input when sep
- -- succeeds and p fails, so we use this variant here.
- sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
-
-
--- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript, isbn, pmid
-schemes :: [String]
-schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
- "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
- "h323","http","https","iax","icap","im","imap","info","ipp","iris",
- "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
- "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
- "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
- "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
- "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
- "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
- "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
- "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
- "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
- "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
- "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
- "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
- "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
- "platform","proxy","psyc","query","res","resource","rmi","rsync",
- "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
- "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
- "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr", "isbn", "pmid"]
-
-uriScheme :: Stream s m Char => ParserT s st m String
-uriScheme = oneOfStringsCI schemes
-
--- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
-uri = try $ do
- scheme <- uriScheme
- char ':'
- -- We allow sentence punctuation except at the end, since
- -- we don't want the trailing '.' in 'http://google.com.' We want to allow
- -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
- -- as a URL, while NOT picking up the closing paren in
- -- (http://wikipedia.org). So we include balanced parens in the URL.
- let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
- let wordChar = satisfy isWordChar
- let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
- let entity = () <$ characterReference
- let punct = skipMany1 (char ',')
- <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
- let uriChunk = skipMany1 wordChar
- <|> percentEscaped
- <|> entity
- <|> (try $ punct >>
- lookAhead (void (satisfy isWordChar) <|> percentEscaped))
- str <- snd <$> withRaw (skipMany1 ( () <$
- (enclosed (char '(') (char ')') uriChunk
- <|> enclosed (char '{') (char '}') uriChunk
- <|> enclosed (char '[') (char ']') uriChunk)
- <|> uriChunk))
- str' <- option str $ char '/' >> return (str ++ "/")
- let uri' = scheme ++ ":" ++ fromEntities str'
- return (uri', escapeURI uri')
-
-mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
-mathInlineWith op cl = try $ do
- string op
- notFollowedBy space
- words' <- many1Till (count 1 (noneOf " \t\n\\")
- <|> (char '\\' >>
- -- This next clause is needed because \text{..} can
- -- contain $, \(\), etc.
- (try (string "text" >>
- (("\\text" ++) <$> inBalancedBraces 0 ""))
- <|> (\c -> ['\\',c]) <$> anyChar))
- <|> do (blankline <* notFollowedBy' blankline) <|>
- (oneOf " \t" <* skipMany (oneOf " \t"))
- notFollowedBy (char '$')
- return " "
- ) (try $ string cl)
- notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
- where
- inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
- inBalancedBraces 0 "" = do
- c <- anyChar
- if c == '{'
- then inBalancedBraces 1 "{"
- else mzero
- inBalancedBraces 0 s = return $ reverse s
- inBalancedBraces numOpen ('\\':xs) = do
- c <- anyChar
- inBalancedBraces numOpen (c:'\\':xs)
- inBalancedBraces numOpen xs = do
- c <- anyChar
- case c of
- '}' -> inBalancedBraces (numOpen - 1) (c:xs)
- '{' -> inBalancedBraces (numOpen + 1) (c:xs)
- _ -> inBalancedBraces numOpen (c:xs)
-
-mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
-mathDisplayWith op cl = try $ do
- string op
- many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
- => ParserT s st m String
-mathDisplay =
- (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathDisplayWith "\\[" "\\]")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathDisplayWith "\\\\[" "\\\\]")
-
-mathInline :: (HasReaderOptions st , Stream s m Char)
- => ParserT s st m String
-mathInline =
- (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathInlineWith "\\(" "\\)")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathInlineWith "\\\\(" "\\\\)")
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
- => ParserT s st m a -- ^ Parser to apply
- -> ParserT s st m (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
--- | Applies a parser and returns the raw string that was parsed,
--- along with the value produced by the parser.
-withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
-withRaw parser = do
- pos1 <- getPosition
- inp <- getInput
- result <- parser
- pos2 <- getPosition
- let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
- let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
- let inplines = take ((l2 - l1) + 1) $ lines inp
- let raw = case inplines of
- [] -> ""
- [l] -> take (c2 - c1) l
- ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
- return (result, raw)
-
--- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
- => ParserT s st m Char -- ^ Parser for character to escape
- -> ParserT s st m Char
-escaped parser = try $ char '\\' >> parser
-
--- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
-characterReference = try $ do
- char '&'
- ent <- many1Till nonspaceChar (char ';')
- let ent' = case ent of
- '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
- case lookupEntity ent' of
- Just (c : _) -> return c
- _ -> fail "entity not found"
-
--- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
-
--- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
-
--- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-decimal = do
- num <- many1 digit
- return (Decimal, read num)
-
--- | Parses a '@' and optional label and
--- returns (DefaultStyle, [next example number]). The next
--- example number is incremented in parser state, and the label
--- (if present) is added to the label table.
-exampleNum :: Stream s m Char
- => ParserT s ParserState m (ListNumberStyle, Int)
-exampleNum = do
- char '@'
- lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
- st <- getState
- let num = stateNextExample st
- let newlabels = if null lab
- then stateExamples st
- else M.insert lab num $ stateExamples st
- updateState $ \s -> s{ stateNextExample = num + 1
- , stateExamples = newlabels }
- return (Example, num)
-
--- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-defaultNum = do
- char '#'
- return (DefaultStyle, 1)
-
--- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
-
--- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
-
--- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
-romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
- (char 'I' >> return (UpperRoman, 1))
-
--- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
-anyOrderedListMarker = choice $
- [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, exampleNum, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-
--- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
-
--- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
-
--- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
- => ParserT s st m (ListNumberStyle, Int)
- -> ParserT s st m ListAttributes
-inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
-
--- | Parses an ordered list marker with a given style and delimiter,
--- returns number.
-orderedListMarker :: Stream s m Char
- => ListNumberStyle
- -> ListNumberDelim
- -> ParserT s ParserState m Int
-orderedListMarker style delim = do
- let num = defaultNum <|> -- # can continue any kind of list
- case style of
- DefaultStyle -> decimal
- Example -> exampleNum
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, _, _) <- context num
- return start
-
--- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
-charRef = do
- c <- characterReference
- return $ Str [c]
-
-lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
-lineBlockLine = try $ do
- char '|'
- char ' '
- white <- many (spaceChar >> return '\160')
- notFollowedBy newline
- line <- anyLine
- continuations <- many (try $ char ' ' >> anyLine)
- return $ white ++ unwords (line : continuations)
-
-blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char
-blankLineBlockLine = try (char '|' >> blankline)
-
--- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
-lineBlockLines = try $ do
- lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine))
- skipMany1 $ blankline <|> blankLineBlockLine
- return lines'
-
--- | Parse a table using 'headerParser', 'rowParser',
--- 'lineParser', and 'footerParser'.
-tableWith :: Stream s m Char
- => ParserT s ParserState m ([Blocks], [Alignment], [Int])
- -> ([Int] -> ParserT s ParserState m [Blocks])
- -> ParserT s ParserState m sep
- -> ParserT s ParserState m end
- -> ParserT s ParserState m Blocks
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy1` lineParser
- footerParser
- numColumns <- getOption readerColumns
- let widths = if (indices == [])
- then replicate (length aligns) 0.0
- else widthsFromIndices numColumns indices
- return $ B.table mempty (zip aligns widths) heads lines'
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns' indices =
- let numColumns = max numColumns' (if null indices then 0 else last indices)
- lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
----
-
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks -- ^ Block list parser
- -> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
-gridTableWith blocks headless =
- tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: Stream [Char] m Char
- => Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
- -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int])
-gridTableHeader headless blocks = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >>
- many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault
- -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString blocks) $ map trim rawHeads
- return (heads, aligns, indices)
-
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks
- -> [Int]
- -> ParserT [Char] ParserState m [Blocks]
-gridTableRow blocks indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString blocks) cols
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
-compactifyCell :: Blocks -> Blocks
-compactifyCell bs = head $ compactify [bs]
-
--- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
-gridTableFooter = blanklines
-
----
-
--- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Monad m)
- => ParserT [Char] st m a -- ^ parser
- -> st -- ^ initial state
- -> String -- ^ input
- -> m (Either PandocError a)
-readWithM parser state input =
- mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input
-
-
--- | Parse a string with a given parser and state
-readWith :: Parser [Char] st a
- -> st
- -> String
- -> Either PandocError a
-readWith p t inp = runIdentity $ readWithM p t inp
-
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a)
- => ParserT [Char] ParserState Identity a
- -> [Char]
- -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
- readWith parser defaultParserState str
-
--- | Parsing options.
-data ParserState = ParserState
- { stateOptions :: ReaderOptions, -- ^ User options
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateAllowLinks :: Bool, -- ^ Allow parsing of links
- stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
- stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
- stateKeys :: KeyTable, -- ^ List of reference keys
- stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
- stateSubstitutions :: SubstTable, -- ^ List of substitution references
- stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
- stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
- stateMeta :: Meta, -- ^ Document metadata
- stateMeta' :: F Meta, -- ^ Document metadata
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
- stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
- stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateHasChapters :: Bool, -- ^ True if \chapter encountered
- stateMacros :: [Macro], -- ^ List of macros defined so far
- stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
- stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
- -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
- -- roles), 3) Additional classes (rest of Attr is unused)).
- stateCaption :: Maybe Inlines, -- ^ Caption in current environment
- stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
- stateContainers :: [String], -- ^ parent include files
- stateLogMessages :: [LogMessage], -- ^ log messages
- stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context
- }
-
-instance Default ParserState where
- def = defaultParserState
-
-instance HasMeta ParserState where
- setMeta field val st =
- st{ stateMeta = setMeta field val $ stateMeta st }
- deleteMeta field st =
- st{ stateMeta = deleteMeta field $ stateMeta st }
-
-class HasReaderOptions st where
- extractReaderOptions :: st -> ReaderOptions
- getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
- -- default
- getOption f = (f . extractReaderOptions) <$> getState
-
-class HasQuoteContext st m where
- getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
- withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
-
-instance Monad m => HasQuoteContext ParserState m where
- getQuoteContext = stateQuoteContext <$> getState
- withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-instance HasReaderOptions ParserState where
- extractReaderOptions = stateOptions
-
-class HasHeaderMap st where
- extractHeaderMap :: st -> M.Map Inlines String
- updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
- st -> st
-
-instance HasHeaderMap ParserState where
- extractHeaderMap = stateHeaders
- updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
-
-class HasIdentifierList st where
- extractIdentifierList :: st -> Set.Set String
- updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st
-
-instance HasIdentifierList ParserState where
- extractIdentifierList = stateIdentifiers
- updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
-
-class HasMacros st where
- extractMacros :: st -> [Macro]
- updateMacros :: ([Macro] -> [Macro]) -> st -> st
-
-instance HasMacros ParserState where
- extractMacros = stateMacros
- updateMacros f st = st{ stateMacros = f $ stateMacros st }
-
-class HasLastStrPosition st where
- setLastStrPos :: SourcePos -> st -> st
- getLastStrPos :: st -> Maybe SourcePos
-
-instance HasLastStrPosition ParserState where
- setLastStrPos pos st = st{ stateLastStrPos = Just pos }
- getLastStrPos st = stateLastStrPos st
-
-class HasLogMessages st where
- addLogMessage :: LogMessage -> st -> st
- getLogMessages :: st -> [LogMessage]
-
-instance HasLogMessages ParserState where
- addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
- getLogMessages st = reverse $ stateLogMessages st
-
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateOptions = def,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateAllowLinks = True,
- stateMaxNestingLevel = 6,
- stateLastStrPos = Nothing,
- stateKeys = M.empty,
- stateHeaderKeys = M.empty,
- stateSubstitutions = M.empty,
- stateNotes = [],
- stateNotes' = [],
- stateMeta = nullMeta,
- stateMeta' = return nullMeta,
- stateHeaderTable = [],
- stateHeaders = M.empty,
- stateIdentifiers = Set.empty,
- stateNextExample = 1,
- stateExamples = M.empty,
- stateHasChapters = False,
- stateMacros = [],
- stateRstDefaultRole = "title-reference",
- stateRstCustomRoles = M.empty,
- stateCaption = Nothing,
- stateInHtmlBlock = Nothing,
- stateContainers = [],
- stateLogMessages = [],
- stateMarkdownAttribute = False
- }
-
--- | Add a log message.
-logMessage :: (Stream s m a, HasLogMessages st)
- => LogMessage -> ParserT s st m ()
-logMessage msg = updateState (addLogMessage msg)
-
--- | Report all the accumulated log messages, according to verbosity level.
-reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
-reportLogMessages = do
- msgs <- getLogMessages <$> getState
- mapM_ report msgs
-
--- | Succeed only if the extension is enabled.
-guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
-
--- | Succeed only if the extension is disabled.
-guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
-guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
-
--- | Update the position on which the last string ended.
-updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
-updateLastStrPos = getPosition >>= updateState . setLastStrPos
-
--- | Whether we are right after the end of a string.
-notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
-notAfterString = do
- pos <- getPosition
- st <- getState
- return $ getLastStrPos st /= Just pos
-
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
-
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
-
-data QuoteContext
- = InSingleQuote -- ^ Used when parsing inside single quotes
- | InDoubleQuote -- ^ Used when parsing inside double quotes
- | NoQuote -- ^ Used when not parsing inside quotes
- deriving (Eq, Show)
-
-type NoteTable = [(String, String)]
-
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
-
-newtype Key = Key String deriving (Show, Read, Eq, Ord)
-
-toKey :: String -> Key
-toKey = Key . map toLower . unwords . words . unbracket
- where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
- unbracket xs = xs
-
-type KeyTable = M.Map Key (Target, Attr)
-
-type SubstTable = M.Map Key Inlines
-
--- | Add header to the list of headers in state, together
--- with its associated identifier. If the identifier is null
--- and the auto_identifers extension is set, generate a new
--- unique identifier, and update the list of identifiers
--- in state.
-registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
- => Attr -> Inlines -> ParserT s st m Attr
-registerHeader (ident,classes,kvs) header' = do
- ids <- extractIdentifierList <$> getState
- exts <- getOption readerExtensions
- let insert' = M.insertWith (\_new old -> old)
- if null ident && Ext_auto_identifiers `extensionEnabled` exts
- then do
- let id' = uniqueIdent (B.toList header') ids
- let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
- then catMaybes $ map toAsciiChar id'
- else id'
- updateState $ updateIdentifierList $ Set.insert id'
- updateState $ updateIdentifierList $ Set.insert id''
- updateState $ updateHeaderMap $ insert' header' id'
- return (id'',classes,kvs)
- else do
- unless (null ident) $
- updateState $ updateHeaderMap $ insert' header' ident
- return (ident,classes,kvs)
-
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-smartPunctuation inlineParser = do
- guardEnabled Ext_smart
- choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
- return . B.singleQuoted . mconcat
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
- => ParserT s st m Inlines
- -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
- return . B.doubleQuoted . mconcat
-
-failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
- => QuoteContext
- -> ParserT s st m ()
-failIfInQuoteContext context = do
- context' <- getQuoteContext
- if context' == context
- then fail "already inside quotes"
- else return ()
-
-charOrRef :: Stream s m Char => String -> ParserT s st m Char
-charOrRef cs =
- oneOf cs <|> try (do c <- characterReference
- guard (c `elem` cs)
- return c)
-
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
- => ParserT s st m ()
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- -- single quote start can't be right after str
- guard =<< notAfterString
- () <$ charOrRef "'\8216\145"
-
-singleQuoteEnd :: Stream s m Char
- => ParserT s st m ()
-singleQuoteEnd = try $ do
- charOrRef "'\8217\146"
- notFollowedBy alphaNum
-
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
- => ParserT s st m ()
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- try $ do charOrRef "\"\8220\147"
- notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
-
-doubleQuoteEnd :: Stream s m Char
- => ParserT s st m ()
-doubleQuoteEnd = void (charOrRef "\"\8221\148")
-
-ellipses :: Stream s m Char
- => ParserT s st m Inlines
-ellipses = try (string "..." >> return (B.str "\8230"))
-
-dash :: (HasReaderOptions st, Stream s m Char)
- => ParserT s st m Inlines
-dash = try $ do
- oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
- if oldDashes
- then do
- char '-'
- (char '-' >> return (B.str "\8212"))
- <|> (lookAhead digit >> return (B.str "\8211"))
- else do
- string "--"
- (char '-' >> return (B.str "\8212"))
- <|> return (B.str "\8211")
-
--- This is used to prevent exponential blowups for things like:
--- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: Stream s m a
- => ParserT s ParserState m a
- -> ParserT s ParserState m a
-nested p = do
- nestlevel <- stateMaxNestingLevel <$> getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
- return res
-
-citeKey :: (Stream s m Char, HasLastStrPosition st)
- => ParserT s st m (Bool, String)
-citeKey = try $ do
- guard =<< notAfterString
- suppress_author <- option False (char '-' *> return True)
- char '@'
- firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
- let regchar = satisfy (\c -> isAlphaNum c || c == '_')
- let internal p = try $ p <* lookAhead regchar
- rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
- try (oneOf ":/" <* lookAhead (char '/'))
- let key = firstChar:rest
- return (suppress_author, key)
-
-
-token :: (Stream s m t)
- => (t -> String)
- -> (t -> SourcePos)
- -> (t -> Maybe a)
- -> ParsecT s st m a
-token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
-
---
--- Macros
---
-
--- | Parse a \newcommand or \renewcommand macro definition.
-macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
- => ParserT [Char] st m Blocks
-macro = do
- apply <- getOption readerApplyMacros
- inp <- getInput
- case parseMacroDefinitions inp of
- ([], _) -> mzero
- (ms, rest) -> do def' <- count (length inp - length rest) anyChar
- if apply
- then do
- updateState $ \st ->
- updateMacros (ms ++) st
- return mempty
- else return $ rawBlock "latex" def'
-
--- | Apply current macros to string.
-applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
- => String
- -> ParserT [Char] st m String
-applyMacros' target = do
- apply <- getOption readerApplyMacros
- if apply
- then do macros <- extractMacros <$> getState
- return $ applyMacros macros target
- else return target
-
-infixr 5 <+?>
-(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
-a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
-
-extractIdClass :: Attr -> Attr
-extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
- where
- ident' = case (lookup "id" kvs) of
- Just v -> v
- Nothing -> ident
- cls' = case (lookup "class" kvs) of
- Just cl -> words cl
- Nothing -> cls
- kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
-
-insertIncludedFile :: PandocMonad m
- => ParserT String ParserState m Blocks
- -> [FilePath] -> FilePath
- -> ParserT String ParserState m Blocks
-insertIncludedFile blocks dirs f = do
- oldPos <- getPosition
- oldInput <- getInput
- containers <- stateContainers <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
- updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbcontents <- readFileFromDirs dirs f
- contents <- case mbcontents of
- Just s -> return s
- Nothing -> do
- report $ CouldNotLoadIncludeFile f oldPos
- return ""
- setPosition $ newPos f 1 1
- setInput contents
- bs <- blocks
- setInput oldInput
- setPosition oldPos
- updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
- return bs
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
deleted file mode 100644
index 256f38b0c..000000000
--- a/src/Text/Pandoc/Pretty.hs
+++ /dev/null
@@ -1,557 +0,0 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
-{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA
--}
-
-{- |
- Module : Text.Pandoc.Pretty
- Copyright : Copyright (C) 2010-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-A prettyprinting library for the production of text documents,
-including wrapped text, indentated blocks, and tables.
--}
-
-module Text.Pandoc.Pretty (
- Doc
- , render
- , cr
- , blankline
- , blanklines
- , space
- , text
- , char
- , prefixed
- , flush
- , nest
- , hang
- , beforeNonBlank
- , nowrap
- , afterBreak
- , offset
- , minOffset
- , height
- , lblock
- , cblock
- , rblock
- , (<>)
- , (<+>)
- , ($$)
- , ($+$)
- , isEmpty
- , empty
- , cat
- , hcat
- , hsep
- , vcat
- , vsep
- , nestle
- , chomp
- , inside
- , braces
- , brackets
- , parens
- , quotes
- , doubleQuotes
- , charWidth
- , realLength
- )
-
-where
-import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..))
-import qualified Data.Sequence as Seq
-import Data.Foldable (toList)
-import Data.List (intersperse)
-import Data.String
-import Control.Monad.State
-import Data.Char (isSpace)
-import Data.Monoid ((<>))
-
-data RenderState a = RenderState{
- output :: [a] -- ^ In reverse order
- , prefix :: String
- , usePrefix :: Bool
- , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping
- , column :: Int
- , newlines :: Int -- ^ Number of preceding newlines
- }
-
-type DocState a = State (RenderState a) ()
-
-data D = Text Int String
- | Block Int [String]
- | Prefixed String Doc
- | BeforeNonBlank Doc
- | Flush Doc
- | BreakingSpace
- | AfterBreak String
- | CarriageReturn
- | NewLine
- | BlankLines Int -- number of blank lines
- deriving (Show)
-
-newtype Doc = Doc { unDoc :: Seq D }
- deriving (Monoid, Show)
-
-instance IsString Doc where
- fromString = text
-
-isBlank :: D -> Bool
-isBlank BreakingSpace = True
-isBlank CarriageReturn = True
-isBlank NewLine = True
-isBlank (BlankLines _) = True
-isBlank (Text _ (c:_)) = isSpace c
-isBlank _ = False
-
--- | True if the document is empty.
-isEmpty :: Doc -> Bool
-isEmpty = Seq.null . unDoc
-
--- | The empty document.
-empty :: Doc
-empty = mempty
-
--- | Concatenate a list of 'Doc's.
-cat :: [Doc] -> Doc
-cat = mconcat
-
--- | Same as 'cat'.
-hcat :: [Doc] -> Doc
-hcat = mconcat
-
--- | Concatenate a list of 'Doc's, putting breakable spaces
--- between them.
-infixr 6 <+>
-(<+>) :: Doc -> Doc -> Doc
-(<+>) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> space <> y
-
--- | Same as 'cat', but putting breakable spaces between the
--- 'Doc's.
-hsep :: [Doc] -> Doc
-hsep = foldr (<+>) empty
-
-infixr 5 $$
--- | @a $$ b@ puts @a@ above @b@.
-($$) :: Doc -> Doc -> Doc
-($$) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> cr <> y
-
-infixr 5 $+$
--- | @a $+$ b@ puts @a@ above @b@, with a blank line between.
-($+$) :: Doc -> Doc -> Doc
-($+$) x y = if isEmpty x
- then y
- else if isEmpty y
- then x
- else x <> blankline <> y
-
--- | List version of '$$'.
-vcat :: [Doc] -> Doc
-vcat = foldr ($$) empty
-
--- | List version of '$+$'.
-vsep :: [Doc] -> Doc
-vsep = foldr ($+$) empty
-
--- | Removes leading blank lines from a 'Doc'.
-nestle :: Doc -> Doc
-nestle (Doc d) = Doc $ go d
- where go x = case viewl x of
- (BlankLines _ :< rest) -> go rest
- (NewLine :< rest) -> go rest
- _ -> x
-
--- | Chomps trailing blank space off of a 'Doc'.
-chomp :: Doc -> Doc
-chomp d = Doc (fromList dl')
- where dl = toList (unDoc d)
- dl' = reverse $ go $ reverse dl
- go [] = []
- go (BreakingSpace : xs) = go xs
- go (CarriageReturn : xs) = go xs
- go (NewLine : xs) = go xs
- go (BlankLines _ : xs) = go xs
- go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
- go xs = xs
-
-outp :: (IsString a) => Int -> String -> DocState a
-outp off s | off < 0 = do -- offset < 0 means newline characters
- st' <- get
- let rawpref = prefix st'
- when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
- let pref = reverse $ dropWhile isSpace $ reverse rawpref
- modify $ \st -> st{ output = fromString pref : output st
- , column = column st + realLength pref }
- let numnewlines = length $ takeWhile (=='\n') $ reverse s
- modify $ \st -> st { output = fromString s : output st
- , column = 0
- , newlines = newlines st + numnewlines }
-outp off s = do -- offset >= 0 (0 might be combining char)
- st' <- get
- let pref = prefix st'
- when (column st' == 0 && usePrefix st' && not (null pref)) $ do
- modify $ \st -> st{ output = fromString pref : output st
- , column = column st + realLength pref }
- modify $ \st -> st{ output = fromString s : output st
- , column = column st + off
- , newlines = 0 }
-
--- | Renders a 'Doc'. @render (Just n)@ will use
--- a line length of @n@ to reflow text on breakable spaces.
--- @render Nothing@ will not reflow text.
-render :: (IsString a) => Maybe Int -> Doc -> a
-render linelen doc = fromString . mconcat . reverse . output $
- execState (renderDoc doc) startingState
- where startingState = RenderState{
- output = mempty
- , prefix = ""
- , usePrefix = True
- , lineLength = linelen
- , column = 0
- , newlines = 2 }
-
-renderDoc :: (IsString a, Monoid a)
- => Doc -> DocState a
-renderDoc = renderList . toList . unDoc
-
-data IsBlock = IsBlock Int [String]
-
--- This would be nicer with a pattern synonym
--- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..)
-
-renderList :: (IsString a, Monoid a)
- => [D] -> DocState a
-renderList [] = return ()
-renderList (Text off s : xs) = do
- outp off s
- renderList xs
-
-renderList (Prefixed pref d : xs) = do
- st <- get
- let oldPref = prefix st
- put st{ prefix = prefix st ++ pref }
- renderDoc d
- modify $ \s -> s{ prefix = oldPref }
- renderList xs
-
-renderList (Flush d : xs) = do
- st <- get
- let oldUsePrefix = usePrefix st
- put st{ usePrefix = False }
- renderDoc d
- modify $ \s -> s{ usePrefix = oldUsePrefix }
- renderList xs
-
-renderList (BeforeNonBlank d : xs) =
- case xs of
- (x:_) | isBlank x -> renderList xs
- | otherwise -> renderDoc d >> renderList xs
- [] -> renderList xs
-
-renderList [BlankLines _] = return ()
-
-renderList (BlankLines m : BlankLines n : xs) =
- renderList (BlankLines (max m n) : xs)
-
-renderList (BlankLines num : xs) = do
- st <- get
- case output st of
- _ | newlines st > num -> return ()
- | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
- renderList xs
-
-renderList (CarriageReturn : BlankLines m : xs) =
- renderList (BlankLines m : xs)
-
-renderList (CarriageReturn : xs) = do
- st <- get
- if newlines st > 0 || null xs
- then renderList xs
- else do
- outp (-1) "\n"
- renderList xs
-
-renderList (NewLine : xs) = do
- outp (-1) "\n"
- renderList xs
-
-renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs)
-renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
-renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs)
-renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
-renderList (BreakingSpace : xs) = do
- let isText (Text _ _) = True
- isText (Block _ _) = True
- isText (AfterBreak _) = True
- isText _ = False
- let isBreakingSpace BreakingSpace = True
- isBreakingSpace _ = False
- let xs' = dropWhile isBreakingSpace xs
- let next = takeWhile isText xs'
- st <- get
- let off = sum $ map offsetOf next
- case lineLength st of
- Just l | column st + 1 + off > l -> do
- outp (-1) "\n"
- renderList xs'
- _ -> do
- outp 1 " "
- renderList xs'
-
-renderList (AfterBreak s : xs) = do
- st <- get
- if newlines st > 0
- then outp (realLength s) s
- else return ()
- renderList xs
-
-renderList (Block i1 s1 : Block i2 s2 : xs) =
- renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs)
-
-renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) =
- renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs)
-
-renderList (Block _width lns : xs) = do
- st <- get
- let oldPref = prefix st
- case column st - realLength oldPref of
- n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
- _ -> return ()
- renderList $ intersperse CarriageReturn (map (Text 0) lns)
- modify $ \s -> s{ prefix = oldPref }
- renderList xs
-
-mergeBlocks :: Bool -> IsBlock -> IsBlock -> D
-mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) =
- Block (w1 + w2 + if addSpace then 1 else 0) $
- zipWith (\l1 l2 -> pad w1 l1 ++ l2) lns1' (map sp lns2')
- where (lns1', lns2') = case (length lns1, length lns2) of
- (x, y) | x > y -> (lns1,
- lns2 ++ replicate (x - y) "")
- | x < y -> (lns1 ++ replicate (y - x) "",
- lns2)
- | otherwise -> (lns1, lns2)
- pad n s = s ++ replicate (n - realLength s) ' '
- sp "" = ""
- sp xs = if addSpace then (' ' : xs) else xs
-
-offsetOf :: D -> Int
-offsetOf (Text o _) = o
-offsetOf (Block w _) = w
-offsetOf BreakingSpace = 1
-offsetOf _ = 0
-
--- | A literal string.
-text :: String -> Doc
-text = Doc . toChunks
- where toChunks :: String -> Seq D
- toChunks [] = mempty
- toChunks s = case break (=='\n') s of
- ([], _:ys) -> NewLine <| toChunks ys
- (xs, _:ys) -> Text (realLength xs) xs <|
- (NewLine <| toChunks ys)
- (xs, []) -> singleton $ Text (realLength xs) xs
-
--- | A character.
-char :: Char -> Doc
-char c = text [c]
-
--- | A breaking (reflowable) space.
-space :: Doc
-space = Doc $ singleton BreakingSpace
-
--- | A carriage return. Does nothing if we're at the beginning of
--- a line; otherwise inserts a newline.
-cr :: Doc
-cr = Doc $ singleton CarriageReturn
-
--- | Inserts a blank line unless one exists already.
--- (@blankline <> blankline@ has the same effect as @blankline@.
-blankline :: Doc
-blankline = Doc $ singleton (BlankLines 1)
-
--- | Inserts a blank lines unless they exists already.
--- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@.
-blanklines :: Int -> Doc
-blanklines n = Doc $ singleton (BlankLines n)
-
--- | Uses the specified string as a prefix for every line of
--- the inside document (except the first, if not at the beginning
--- of the line).
-prefixed :: String -> Doc -> Doc
-prefixed pref doc = Doc $ singleton $ Prefixed pref doc
-
--- | Makes a 'Doc' flush against the left margin.
-flush :: Doc -> Doc
-flush doc = Doc $ singleton $ Flush doc
-
--- | Indents a 'Doc' by the specified number of spaces.
-nest :: Int -> Doc -> Doc
-nest ind = prefixed (replicate ind ' ')
-
--- | A hanging indent. @hang ind start doc@ prints @start@,
--- then @doc@, leaving an indent of @ind@ spaces on every
--- line but the first.
-hang :: Int -> Doc -> Doc -> Doc
-hang ind start doc = start <> nest ind doc
-
--- | @beforeNonBlank d@ conditionally includes @d@ unless it is
--- followed by blank space.
-beforeNonBlank :: Doc -> Doc
-beforeNonBlank d = Doc $ singleton (BeforeNonBlank d)
-
--- | Makes a 'Doc' non-reflowable.
-nowrap :: Doc -> Doc
-nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc
- where replaceSpace _ BreakingSpace = Text 1 " "
- replaceSpace _ x = x
-
--- | Content to print only if it comes at the beginning of a line,
--- to be used e.g. for escaping line-initial `.` in groff man.
-afterBreak :: String -> Doc
-afterBreak s = Doc $ singleton (AfterBreak s)
-
--- | Returns the width of a 'Doc'.
-offset :: Doc -> Int
-offset d = case map realLength . lines . render Nothing $ d of
- [] -> 0
- os -> maximum os
-
--- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces.
-minOffset :: Doc -> Int
-minOffset d = maximum (0: map realLength (lines $ render (Just 0) d))
-
--- | @lblock n d@ is a block of width @n@ characters, with
--- text derived from @d@ and aligned to the left.
-lblock :: Int -> Doc -> Doc
-lblock = block id
-
--- | Like 'lblock' but aligned to the right.
-rblock :: Int -> Doc -> Doc
-rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w
-
--- | Like 'lblock' but aligned centered.
-cblock :: Int -> Doc -> Doc
-cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w
-
--- | Returns the height of a block or other 'Doc'.
-height :: Doc -> Int
-height = length . lines . render Nothing
-
-block :: (String -> String) -> Int -> Doc -> Doc
-block filler width d
- | width < 1 && not (isEmpty d) = error "Text.Pandoc.Pretty.block: width < 1"
- | otherwise = Doc $ singleton $ Block width $ map filler
- $ chop width $ render (Just width) d
-
-chop :: Int -> String -> [String]
-chop _ [] = []
-chop n cs = case break (=='\n') cs of
- (xs, ys) -> if len <= n
- then case ys of
- [] -> [xs]
- ['\n'] -> [xs]
- (_:zs) -> xs : chop n zs
- else take n xs : chop n (drop n xs ++ ys)
- where len = realLength xs
-
--- | Encloses a 'Doc' inside a start and end 'Doc'.
-inside :: Doc -> Doc -> Doc -> Doc
-inside start end contents =
- start <> contents <> end
-
--- | Puts a 'Doc' in curly braces.
-braces :: Doc -> Doc
-braces = inside (char '{') (char '}')
-
--- | Puts a 'Doc' in square brackets.
-brackets :: Doc -> Doc
-brackets = inside (char '[') (char ']')
-
--- | Puts a 'Doc' in parentheses.
-parens :: Doc -> Doc
-parens = inside (char '(') (char ')')
-
--- | Wraps a 'Doc' in single quotes.
-quotes :: Doc -> Doc
-quotes = inside (char '\'') (char '\'')
-
--- | Wraps a 'Doc' in double quotes.
-doubleQuotes :: Doc -> Doc
-doubleQuotes = inside (char '"') (char '"')
-
--- | Returns width of a character in a monospace font: 0 for a combining
--- character, 1 for a regular character, 2 for an East Asian wide character.
-charWidth :: Char -> Int
-charWidth c =
- case c of
- _ | c < '\x0300' -> 1
- | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
- | c >= '\x0370' && c <= '\x10FC' -> 1
- | c >= '\x1100' && c <= '\x115F' -> 2
- | c >= '\x1160' && c <= '\x11A2' -> 1
- | c >= '\x11A3' && c <= '\x11A7' -> 2
- | c >= '\x11A8' && c <= '\x11F9' -> 1
- | c >= '\x11FA' && c <= '\x11FF' -> 2
- | c >= '\x1200' && c <= '\x2328' -> 1
- | c >= '\x2329' && c <= '\x232A' -> 2
- | c >= '\x232B' && c <= '\x2E31' -> 1
- | c >= '\x2E80' && c <= '\x303E' -> 2
- | c == '\x303F' -> 1
- | c >= '\x3041' && c <= '\x3247' -> 2
- | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
- | c >= '\x3250' && c <= '\x4DBF' -> 2
- | c >= '\x4DC0' && c <= '\x4DFF' -> 1
- | c >= '\x4E00' && c <= '\xA4C6' -> 2
- | c >= '\xA4D0' && c <= '\xA95F' -> 1
- | c >= '\xA960' && c <= '\xA97C' -> 2
- | c >= '\xA980' && c <= '\xABF9' -> 1
- | c >= '\xAC00' && c <= '\xD7FB' -> 2
- | c >= '\xD800' && c <= '\xDFFF' -> 1
- | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
- | c >= '\xF900' && c <= '\xFAFF' -> 2
- | c >= '\xFB00' && c <= '\xFDFD' -> 1
- | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
- | c >= '\xFE10' && c <= '\xFE19' -> 2
- | c >= '\xFE20' && c <= '\xFE26' -> 1
- | c >= '\xFE30' && c <= '\xFE6B' -> 2
- | c >= '\xFE70' && c <= '\xFEFF' -> 1
- | c >= '\xFF01' && c <= '\xFF60' -> 2
- | c >= '\xFF61' && c <= '\x16A38' -> 1
- | c >= '\x1B000' && c <= '\x1B001' -> 2
- | c >= '\x1D000' && c <= '\x1F1FF' -> 1
- | c >= '\x1F200' && c <= '\x1F251' -> 2
- | c >= '\x1F300' && c <= '\x1F773' -> 1
- | c >= '\x20000' && c <= '\x3FFFD' -> 2
- | otherwise -> 1
-
--- | Get real length of string, taking into account combining and double-wide
--- characters.
-realLength :: String -> Int
-realLength = foldr (\a b -> charWidth a + b) 0
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
deleted file mode 100644
index 294a38a1b..000000000
--- a/src/Text/Pandoc/Process.hs
+++ /dev/null
@@ -1,98 +0,0 @@
-{-
-Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Process
- Copyright : Copyright (C) 2013-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-ByteString variant of 'readProcessWithExitCode'.
--}
-module Text.Pandoc.Process (pipeProcess)
-where
-import System.Process
-import System.Exit (ExitCode (..))
-import Control.Exception
-import System.IO (hClose, hFlush)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
-import Control.Monad (unless)
-import qualified Data.ByteString.Lazy as BL
-
-{- |
-Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings
-instead of strings and allows setting environment variables.
-
-@readProcessWithExitCode@ creates an external process, reads its
-standard output strictly, waits until the process
-terminates, and then returns the 'ExitCode' of the process
-and the standard output. stderr is inherited from the parent.
-
-If an asynchronous exception is thrown to the thread executing
-@readProcessWithExitCode@, the forked process will be terminated and
-@readProcessWithExitCode@ will wait (block) until the process has been
-terminated.
--}
-
-pipeProcess
- :: Maybe [(String, String)] -- ^ environment variables
- -> FilePath -- ^ Filename of the executable (see 'proc' for details)
- -> [String] -- ^ any arguments
- -> BL.ByteString -- ^ standard input
- -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout
-pipeProcess mbenv cmd args input =
- mask $ \restore -> do
- (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args)
- { env = mbenv,
- std_in = CreatePipe,
- std_out = CreatePipe,
- std_err = Inherit }
- flip onException
- (do hClose inh; hClose outh;
- terminateProcess pid; waitForProcess pid) $ restore $ do
- -- fork off a thread to start consuming stdout
- out <- BL.hGetContents outh
- waitOut <- forkWait $ evaluate $ BL.length out
-
- -- now write and flush any input
- let writeInput = do
- unless (BL.null input) $ do
- BL.hPutStr inh input
- hFlush inh
- hClose inh
-
- writeInput
-
- -- wait on the output
- waitOut
-
- hClose outh
-
- -- wait on the process
- ex <- waitForProcess pid
-
- return (ex, out)
-
-forkWait :: IO a -> IO (IO a)
-forkWait a = do
- res <- newEmptyMVar
- _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res
- return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return)
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
deleted file mode 100644
index b0bcbd580..000000000
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ /dev/null
@@ -1,128 +0,0 @@
-{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.CommonMark
- Copyright : Copyright (C) 2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of CommonMark-formatted plain text to 'Pandoc' document.
-
-CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
--}
-module Text.Pandoc.Readers.CommonMark (readCommonMark)
-where
-
-import CMark
-import Data.Text (unpack, pack)
-import Data.List (groupBy)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Class (PandocMonad)
-
--- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc
-readCommonMark opts s = return $
- nodeToPandoc $ commonmarkToNode opts' $ pack s
- where opts' = if extensionEnabled Ext_smart (readerExtensions opts)
- then [optNormalize, optSmart]
- else [optNormalize]
-
-nodeToPandoc :: Node -> Pandoc
-nodeToPandoc (Node _ DOCUMENT nodes) =
- Pandoc nullMeta $ foldr addBlock [] nodes
-nodeToPandoc n = -- shouldn't happen
- Pandoc nullMeta $ foldr addBlock [] [n]
-
-addBlocks :: [Node] -> [Block]
-addBlocks = foldr addBlock []
-
-addBlock :: Node -> [Block] -> [Block]
-addBlock (Node _ PARAGRAPH nodes) =
- (Para (addInlines nodes) :)
-addBlock (Node _ THEMATIC_BREAK _) =
- (HorizontalRule :)
-addBlock (Node _ BLOCK_QUOTE nodes) =
- (BlockQuote (addBlocks nodes) :)
-addBlock (Node _ (HTML_BLOCK t) _) =
- (RawBlock (Format "html") (unpack t) :)
--- Note: the cmark parser will never generate CUSTOM_BLOCK,
--- so we don't need to handle it:
-addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
- id
-addBlock (Node _ (CODE_BLOCK info t) _) =
- (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
-addBlock (Node _ (HEADING lev) nodes) =
- (Header lev ("",[],[]) (addInlines nodes) :)
-addBlock (Node _ (LIST listAttrs) nodes) =
- (constructor (map (setTightness . addBlocks . children) nodes) :)
- where constructor = case listType listAttrs of
- BULLET_LIST -> BulletList
- ORDERED_LIST -> OrderedList
- (start, DefaultStyle, delim)
- start = listStart listAttrs
- setTightness = if listTight listAttrs
- then map paraToPlain
- else id
- paraToPlain (Para xs) = Plain (xs)
- paraToPlain x = x
- delim = case listDelim listAttrs of
- PERIOD_DELIM -> Period
- PAREN_DELIM -> OneParen
-addBlock (Node _ ITEM _) = id -- handled in LIST
-addBlock _ = id
-
-children :: Node -> [Node]
-children (Node _ _ ns) = ns
-
-addInlines :: [Node] -> [Inline]
-addInlines = foldr addInline []
-
-addInline :: Node -> [Inline] -> [Inline]
-addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
- where raw = unpack t
- clumps = groupBy samekind raw
- samekind ' ' ' ' = True
- samekind ' ' _ = False
- samekind _ ' ' = False
- samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str xs
-addInline (Node _ LINEBREAK _) = (LineBreak :)
-addInline (Node _ SOFTBREAK _) = (SoftBreak :)
-addInline (Node _ (HTML_INLINE t) _) =
- (RawInline (Format "html") (unpack t) :)
--- Note: the cmark parser will never generate CUSTOM_BLOCK,
--- so we don't need to handle it:
-addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
- id
-addInline (Node _ (CODE t) _) =
- (Code ("",[],[]) (unpack t) :)
-addInline (Node _ EMPH nodes) =
- (Emph (addInlines nodes) :)
-addInline (Node _ STRONG nodes) =
- (Strong (addInlines nodes) :)
-addInline (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
-addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
deleted file mode 100644
index bef256a93..000000000
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ /dev/null
@@ -1,1055 +0,0 @@
-module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper)
-import Text.Pandoc.Shared (safeRead)
-import Text.Pandoc.Options
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
-import Data.Either (rights)
-import Data.Generics
-import Data.Char (isSpace)
-import Control.Monad.State
-import Data.List (intersperse)
-import Data.Maybe (fromMaybe)
-import Text.TeXMath (readMathML, writeTeX)
-import Data.Default
-import Data.Foldable (asum)
-import Text.Pandoc.Class (PandocMonad)
-
-{-
-
-List of all DocBook tags, with [x] indicating implemented,
-[o] meaning intentionally left unimplemented (pass through):
-
-[o] abbrev - An abbreviation, especially one followed by a period
-[x] abstract - A summary
-[o] accel - A graphical user interface (GUI) keyboard shortcut
-[x] ackno - Acknowledgements in an Article
-[o] acronym - An often pronounceable word made from the initial
-[o] action - A response to a user event
-[o] address - A real-world address, generally a postal address
-[ ] affiliation - The institutional affiliation of an individual
-[ ] alt - Text representation for a graphical element
-[o] anchor - A spot in the document
-[x] answer - An answer to a question posed in a QandASet
-[x] appendix - An appendix in a Book or Article
-[x] appendixinfo - Meta-information for an Appendix
-[o] application - The name of a software program
-[x] area - A region defined for a Callout in a graphic or code example
-[x] areaset - A set of related areas in a graphic or code example
-[x] areaspec - A collection of regions in a graphic or code example
-[ ] arg - An argument in a CmdSynopsis
-[x] article - An article
-[x] articleinfo - Meta-information for an Article
-[ ] artpagenums - The page numbers of an article as published
-[x] attribution - The source of a block quote or epigraph
-[ ] audiodata - Pointer to external audio data
-[ ] audioobject - A wrapper for audio data and its associated meta-information
-[x] author - The name of an individual author
-[ ] authorblurb - A short description or note about an author
-[x] authorgroup - Wrapper for author information when a document has
- multiple authors or collabarators
-[x] authorinitials - The initials or other short identifier for an author
-[o] beginpage - The location of a page break in a print version of the document
-[ ] bibliocoverage - The spatial or temporal coverage of a document
-[x] bibliodiv - A section of a Bibliography
-[x] biblioentry - An entry in a Bibliography
-[x] bibliography - A bibliography
-[ ] bibliographyinfo - Meta-information for a Bibliography
-[ ] biblioid - An identifier for a document
-[o] bibliolist - A wrapper for a set of bibliography entries
-[ ] bibliomisc - Untyped bibliographic information
-[x] bibliomixed - An entry in a Bibliography
-[ ] bibliomset - A cooked container for related bibliographic information
-[ ] biblioref - A cross reference to a bibliographic entry
-[ ] bibliorelation - The relationship of a document to another
-[ ] biblioset - A raw container for related bibliographic information
-[ ] bibliosource - The source of a document
-[ ] blockinfo - Meta-information for a block element
-[x] blockquote - A quotation set off from the main text
-[x] book - A book
-[x] bookinfo - Meta-information for a Book
-[x] bridgehead - A free-floating heading
-[x] callout - A “called out” description of a marked Area
-[x] calloutlist - A list of Callouts
-[x] caption - A caption
-[x] caution - A note of caution
-[x] chapter - A chapter, as of a book
-[x] chapterinfo - Meta-information for a Chapter
-[ ] citation - An inline bibliographic reference to another published work
-[ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
-[ ] citetitle - The title of a cited work
-[ ] city - The name of a city in an address
-[x] classname - The name of a class, in the object-oriented programming sense
-[ ] classsynopsis - The syntax summary for a class definition
-[ ] classsynopsisinfo - Information supplementing the contents of
- a ClassSynopsis
-[ ] cmdsynopsis - A syntax summary for a software command
-[ ] co - The location of a callout embedded in text
-[x] code - An inline code fragment
-[x] col - Specifications for a column in an HTML table
-[x] colgroup - A group of columns in an HTML table
-[ ] collab - Identifies a collaborator
-[ ] collabname - The name of a collaborator
-[ ] colophon - Text at the back of a book describing facts about its production
-[x] colspec - Specifications for a column in a table
-[x] command - The name of an executable program or other software command
-[x] computeroutput - Data, generally text, displayed or presented by a computer
-[ ] confdates - The dates of a conference for which a document was written
-[ ] confgroup - A wrapper for document meta-information about a conference
-[ ] confnum - An identifier, frequently numerical, associated with a conference for which a document was written
-[ ] confsponsor - The sponsor of a conference for which a document was written
-[ ] conftitle - The title of a conference for which a document was written
-[x] constant - A programming or system constant
-[ ] constraint - A constraint in an EBNF production
-[ ] constraintdef - The definition of a constraint in an EBNF production
-[ ] constructorsynopsis - A syntax summary for a constructor
-[ ] contractnum - The contract number of a document
-[ ] contractsponsor - The sponsor of a contract
-[ ] contrib - A summary of the contributions made to a document by a
- credited source
-[ ] copyright - Copyright information about a document
-[ ] coref - A cross reference to a co
-[ ] corpauthor - A corporate author, as opposed to an individual
-[ ] corpcredit - A corporation or organization credited in a document
-[ ] corpname - The name of a corporation
-[ ] country - The name of a country
-[ ] database - The name of a database, or part of a database
-[x] date - The date of publication or revision of a document
-[ ] dedication - A wrapper for the dedication section of a book
-[ ] destructorsynopsis - A syntax summary for a destructor
-[ ] edition - The name or number of an edition of a document
-[ ] editor - The name of the editor of a document
-[x] email - An email address
-[x] emphasis - Emphasized text
-[x] entry - A cell in a table
-[ ] entrytbl - A subtable appearing in place of an Entry in a table
-[ ] envar - A software environment variable
-[x] epigraph - A short inscription at the beginning of a document or component
- note: also handle embedded attribution tag
-[x] equation - A displayed mathematical equation
-[ ] errorcode - An error code
-[ ] errorname - An error name
-[ ] errortext - An error message.
-[ ] errortype - The classification of an error message
-[ ] example - A formal example, with a title
-[ ] exceptionname - The name of an exception
-[ ] fax - A fax number
-[ ] fieldsynopsis - The name of a field in a class definition
-[x] figure - A formal figure, generally an illustration, with a title
-[x] filename - The name of a file
-[ ] firstname - The first name of a person
-[ ] firstterm - The first occurrence of a term
-[x] footnote - A footnote
-[ ] footnoteref - A cross reference to a footnote (a footnote mark)
-[x] foreignphrase - A word or phrase in a language other than the primary
- language of the document
-[x] formalpara - A paragraph with a title
-[ ] funcdef - A function (subroutine) name and its return type
-[ ] funcparams - Parameters for a function referenced through a function
- pointer in a synopsis
-[ ] funcprototype - The prototype of a function
-[ ] funcsynopsis - The syntax summary for a function definition
-[ ] funcsynopsisinfo - Information supplementing the FuncDefs of a FuncSynopsis
-[x] function - The name of a function or subroutine, as in a
- programming language
-[x] glossary - A glossary
-[x] glossaryinfo - Meta-information for a Glossary
-[x] glossdef - A definition in a GlossEntry
-[x] glossdiv - A division in a Glossary
-[x] glossentry - An entry in a Glossary or GlossList
-[x] glosslist - A wrapper for a set of GlossEntrys
-[x] glosssee - A cross-reference from one GlossEntry to another
-[x] glossseealso - A cross-reference from one GlossEntry to another
-[x] glossterm - A glossary term
-[ ] graphic - A displayed graphical object (not an inline)
- Note: in DocBook v5 `graphic` is discarded
-[ ] graphicco - A graphic that contains callout areas
- Note: in DocBook v5 `graphicco` is discarded
-[ ] group - A group of elements in a CmdSynopsis
-[ ] guibutton - The text on a button in a GUI
-[ ] guiicon - Graphic and/or text appearing as a icon in a GUI
-[ ] guilabel - The text of a label in a GUI
-[x] guimenu - The name of a menu in a GUI
-[x] guimenuitem - The name of a terminal menu item in a GUI
-[x] guisubmenu - The name of a submenu in a GUI
-[ ] hardware - A physical part of a computer system
-[ ] highlights - A summary of the main points of the discussed component
-[ ] holder - The name of the individual or organization that holds a copyright
-[o] honorific - The title of a person
-[ ] html:form - An HTML form
-[x] imagedata - Pointer to external image data (only `fileref` attribute
- implemented but not `entityref` which would require parsing of the DTD)
-[x] imageobject - A wrapper for image data and its associated meta-information
-[ ] imageobjectco - A wrapper for an image object with callouts
-[x] important - An admonition set off from the text
-[x] index - An index
-[x] indexdiv - A division in an index
-[x] indexentry - An entry in an index
-[x] indexinfo - Meta-information for an Index
-[x] indexterm - A wrapper for terms to be indexed
-[x] info - A wrapper for information about a component or other block. (DocBook v5)
-[x] informalequation - A displayed mathematical equation without a title
-[x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
-[ ] informaltable - A table without a title
-[ ] initializer - The initializer for a FieldSynopsis
-[x] inlineequation - A mathematical equation or expression occurring inline
-[ ] inlinegraphic - An object containing or pointing to graphical data
- that will be rendered inline
-[x] inlinemediaobject - An inline media object (video, audio, image, and so on)
-[ ] interface - An element of a GUI
-[ ] interfacename - The name of an interface
-[ ] invpartnumber - An inventory part number
-[ ] isbn - The International Standard Book Number of a document
-[ ] issn - The International Standard Serial Number of a periodical
-[ ] issuenum - The number of an issue of a journal
-[x] itemizedlist - A list in which each entry is marked with a bullet or
- other dingbat
-[ ] itermset - A set of index terms in the meta-information of a document
-[ ] jobtitle - The title of an individual in an organization
-[x] keycap - The text printed on a key on a keyboard
-[ ] keycode - The internal, frequently numeric, identifier for a key
- on a keyboard
-[x] keycombo - A combination of input actions
-[ ] keysym - The symbolic name of a key on a keyboard
-[ ] keyword - One of a set of keywords describing the content of a document
-[ ] keywordset - A set of keywords describing the content of a document
-[ ] label - A label on a Question or Answer
-[ ] legalnotice - A statement of legal obligations or requirements
-[ ] lhs - The left-hand side of an EBNF production
-[ ] lineage - The portion of a person's name indicating a relationship to
- ancestors
-[ ] lineannotation - A comment on a line in a verbatim listing
-[x] link - A hypertext link
-[x] listitem - A wrapper for the elements of a list item
-[x] literal - Inline text that is some literal value
-[x] literallayout - A block of text in which line breaks and white space are
- to be reproduced faithfully
-[ ] lot - A list of the titles of formal objects (as tables or figures) in
- a document
-[ ] lotentry - An entry in a list of titles
-[ ] manvolnum - A reference volume number
-[x] markup - A string of formatting markup in text that is to be
- represented literally
-[ ] mathphrase - A mathematical phrase, an expression that can be represented
- with ordinary text and a small amount of markup
-[ ] medialabel - A name that identifies the physical medium on which some
- information resides
-[x] mediaobject - A displayed media object (video, audio, image, etc.)
-[ ] mediaobjectco - A media object that contains callouts
-[x] member - An element of a simple list
-[x] menuchoice - A selection or series of selections from a menu
-[ ] methodname - The name of a method
-[ ] methodparam - Parameters to a method
-[ ] methodsynopsis - A syntax summary for a method
-[x] mml:math - A MathML equation
-[ ] modespec - Application-specific information necessary for the
- completion of an OLink
-[ ] modifier - Modifiers in a synopsis
-[ ] mousebutton - The conventional name of a mouse button
-[ ] msg - A message in a message set
-[ ] msgaud - The audience to which a message in a message set is relevant
-[ ] msgentry - A wrapper for an entry in a message set
-[ ] msgexplan - Explanatory material relating to a message in a message set
-[ ] msginfo - Information about a message in a message set
-[ ] msglevel - The level of importance or severity of a message in a message set
-[ ] msgmain - The primary component of a message in a message set
-[ ] msgorig - The origin of a message in a message set
-[ ] msgrel - A related component of a message in a message set
-[ ] msgset - A detailed set of messages, usually error messages
-[ ] msgsub - A subcomponent of a message in a message set
-[ ] msgtext - The actual text of a message component in a message set
-[ ] nonterminal - A non-terminal in an EBNF production
-[x] note - A message set off from the text
-[ ] objectinfo - Meta-information for an object
-[ ] olink - A link that addresses its target indirectly, through an entity
-[ ] ooclass - A class in an object-oriented programming language
-[ ] ooexception - An exception in an object-oriented programming language
-[ ] oointerface - An interface in an object-oriented programming language
-[x] option - An option for a software command
-[x] optional - Optional information
-[x] orderedlist - A list in which each entry is marked with a sequentially
- incremented label
-[ ] orgdiv - A division of an organization
-[ ] orgname - The name of an organization other than a corporation
-[ ] otheraddr - Uncategorized information in address
-[ ] othercredit - A person or entity, other than an author or editor,
- credited in a document
-[ ] othername - A component of a persons name that is not a first name,
- surname, or lineage
-[ ] package - A package
-[ ] pagenums - The numbers of the pages in a book, for use in a bibliographic
- entry
-[x] para - A paragraph
-[ ] paramdef - Information about a function parameter in a programming language
-[x] parameter - A value or a symbolic reference to a value
-[ ] part - A division in a book
-[ ] partinfo - Meta-information for a Part
-[ ] partintro - An introduction to the contents of a part
-[ ] personblurb - A short description or note about a person
-[ ] personname - The personal name of an individual
-[ ] phone - A telephone number
-[ ] phrase - A span of text
-[ ] pob - A post office box in an address
-[ ] postcode - A postal code in an address
-[x] preface - Introductory matter preceding the first chapter of a book
-[ ] prefaceinfo - Meta-information for a Preface
-[ ] primary - The primary word or phrase under which an index term should be
- sorted
-[ ] primaryie - A primary term in an index entry, not in the text
-[ ] printhistory - The printing history of a document
-[ ] procedure - A list of operations to be performed in a well-defined sequence
-[ ] production - A production in a set of EBNF productions
-[ ] productionrecap - A cross-reference to an EBNF production
-[ ] productionset - A set of EBNF productions
-[ ] productname - The formal name of a product
-[ ] productnumber - A number assigned to a product
-[x] programlisting - A literal listing of all or part of a program
-[ ] programlistingco - A program listing with associated areas used in callouts
-[x] prompt - A character or string indicating the start of an input field in
- a computer display
-[ ] property - A unit of data associated with some part of a computer system
-[ ] pubdate - The date of publication of a document
-[ ] publisher - The publisher of a document
-[ ] publishername - The name of the publisher of a document
-[ ] pubsnumber - A number assigned to a publication other than an ISBN or ISSN
- or inventory part number
-[x] qandadiv - A titled division in a QandASet
-[o] qandaentry - A question/answer set within a QandASet
-[o] qandaset - A question-and-answer set
-[x] question - A question in a QandASet
-[x] quote - An inline quotation
-[ ] refclass - The scope or other indication of applicability of a
- reference entry
-[ ] refdescriptor - A description of the topic of a reference page
-[ ] refentry - A reference page (originally a UNIX man-style reference page)
-[ ] refentryinfo - Meta-information for a Refentry
-[ ] refentrytitle - The title of a reference page
-[ ] reference - A collection of reference entries
-[ ] referenceinfo - Meta-information for a Reference
-[ ] refmeta - Meta-information for a reference entry
-[ ] refmiscinfo - Meta-information for a reference entry other than the title
- and volume number
-[ ] refname - The name of (one of) the subject(s) of a reference page
-[ ] refnamediv - The name, purpose, and classification of a reference page
-[ ] refpurpose - A short (one sentence) synopsis of the topic of a reference
- page
-[x] refsect1 - A major subsection of a reference entry
-[x] refsect1info - Meta-information for a RefSect1
-[x] refsect2 - A subsection of a RefSect1
-[x] refsect2info - Meta-information for a RefSect2
-[x] refsect3 - A subsection of a RefSect2
-[x] refsect3info - Meta-information for a RefSect3
-[x] refsection - A recursive section in a refentry
-[x] refsectioninfo - Meta-information for a refsection
-[ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page
-[ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv
-[x] releaseinfo - Information about a particular release of a document
-[ ] remark - A remark (or comment) intended for presentation in a draft
- manuscript
-[ ] replaceable - Content that may or must be replaced by the user
-[ ] returnvalue - The value returned by a function
-[ ] revdescription - A extended description of a revision to a document
-[ ] revhistory - A history of the revisions to a document
-[ ] revision - An entry describing a single revision in the history of the
- revisions to a document
-[ ] revnumber - A document revision number
-[ ] revremark - A description of a revision to a document
-[ ] rhs - The right-hand side of an EBNF production
-[x] row - A row in a table
-[ ] sbr - An explicit line break in a command synopsis
-[x] screen - Text that a user sees or might see on a computer screen
-[o] screenco - A screen with associated areas used in callouts
-[o] screeninfo - Information about how a screen shot was produced
-[ ] screenshot - A representation of what the user sees or might see on a
- computer screen
-[ ] secondary - A secondary word or phrase in an index term
-[ ] secondaryie - A secondary term in an index entry, rather than in the text
-[x] sect1 - A top-level section of document
-[x] sect1info - Meta-information for a Sect1
-[x] sect2 - A subsection within a Sect1
-[x] sect2info - Meta-information for a Sect2
-[x] sect3 - A subsection within a Sect2
-[x] sect3info - Meta-information for a Sect3
-[x] sect4 - A subsection within a Sect3
-[x] sect4info - Meta-information for a Sect4
-[x] sect5 - A subsection within a Sect4
-[x] sect5info - Meta-information for a Sect5
-[x] section - A recursive section
-[x] sectioninfo - Meta-information for a recursive section
-[x] see - Part of an index term directing the reader instead to another entry
- in the index
-[x] seealso - Part of an index term directing the reader also to another entry
- in the index
-[ ] seealsoie - A See also entry in an index, rather than in the text
-[ ] seeie - A See entry in an index, rather than in the text
-[x] seg - An element of a list item in a segmented list
-[x] seglistitem - A list item in a segmented list
-[x] segmentedlist - A segmented list, a list of sets of elements
-[x] segtitle - The title of an element of a list item in a segmented list
-[ ] seriesvolnums - Numbers of the volumes in a series of books
-[ ] set - A collection of books
-[ ] setindex - An index to a set of books
-[ ] setindexinfo - Meta-information for a SetIndex
-[ ] setinfo - Meta-information for a Set
-[ ] sgmltag - A component of SGML markup
-[ ] shortaffil - A brief description of an affiliation
-[ ] shortcut - A key combination for an action that is also accessible through
- a menu
-[ ] sidebar - A portion of a document that is isolated from the main
- narrative flow
-[ ] sidebarinfo - Meta-information for a Sidebar
-[x] simpara - A paragraph that contains only text and inline markup, no block
- elements
-[x] simplelist - An undecorated list of single words or short phrases
-[ ] simplemsgentry - A wrapper for a simpler entry in a message set
-[ ] simplesect - A section of a document with no subdivisions
-[ ] spanspec - Formatting information for a spanned column in a table
-[ ] state - A state or province in an address
-[ ] step - A unit of action in a procedure
-[ ] stepalternatives - Alternative steps in a procedure
-[ ] street - A street address in an address
-[ ] structfield - A field in a structure (in the programming language sense)
-[ ] structname - The name of a structure (in the programming language sense)
-[ ] subject - One of a group of terms describing the subject matter of a
- document
-[ ] subjectset - A set of terms describing the subject matter of a document
-[ ] subjectterm - A term in a group of terms describing the subject matter of
- a document
-[x] subscript - A subscript (as in H2O, the molecular formula for water)
-[ ] substeps - A wrapper for steps that occur within steps in a procedure
-[x] subtitle - The subtitle of a document
-[x] superscript - A superscript (as in x2, the mathematical notation for x
- multiplied by itself)
-[ ] surname - A family name; in western cultures the last name
-[ ] svg:svg - An SVG graphic
-[x] symbol - A name that is replaced by a value before processing
-[ ] synopfragment - A portion of a CmdSynopsis broken out from the main body
- of the synopsis
-[ ] synopfragmentref - A reference to a fragment of a command synopsis
-[ ] synopsis - A general-purpose element for representing the syntax of
- commands or functions
-[ ] systemitem - A system-related item or term
-[ ] table - A formal table in a document
-[ ] task - A task to be completed
-[ ] taskprerequisites - The prerequisites for a task
-[ ] taskrelated - Information related to a task
-[ ] tasksummary - A summary of a task
-[x] tbody - A wrapper for the rows of a table or informal table
-[x] td - A table entry in an HTML table
-[x] term - The word or phrase being defined or described in a variable list
-[ ] termdef - An inline term definition
-[ ] tertiary - A tertiary word or phrase in an index term
-[ ] tertiaryie - A tertiary term in an index entry, rather than in the text
-[ ] textdata - Pointer to external text data
-[ ] textobject - A wrapper for a text description of an object and its
- associated meta-information
-[ ] tfoot - A table footer consisting of one or more rows
-[x] tgroup - A wrapper for the main content of a table, or part of a table
-[x] th - A table header entry in an HTML table
-[x] thead - A table header consisting of one or more rows
-[x] tip - A suggestion to the user, set off from the text
-[x] title - The text of the title of a section of a document or of a formal
- block-level element
-[x] titleabbrev - The abbreviation of a Title
-[x] toc - A table of contents
-[x] tocback - An entry in a table of contents for a back matter component
-[x] tocchap - An entry in a table of contents for a component in the body of
- a document
-[x] tocentry - A component title in a table of contents
-[x] tocfront - An entry in a table of contents for a front matter component
-[x] toclevel1 - A top-level entry within a table of contents entry for a
- chapter-like component
-[x] toclevel2 - A second-level entry within a table of contents entry for a
- chapter-like component
-[x] toclevel3 - A third-level entry within a table of contents entry for a
- chapter-like component
-[x] toclevel4 - A fourth-level entry within a table of contents entry for a
- chapter-like component
-[x] toclevel5 - A fifth-level entry within a table of contents entry for a
- chapter-like component
-[x] tocpart - An entry in a table of contents for a part of a book
-[ ] token - A unit of information
-[x] tr - A row in an HTML table
-[ ] trademark - A trademark
-[x] type - The classification of a value
-[x] ulink - A link that addresses its target by means of a URL
- (Uniform Resource Locator)
-[x] uri - A Uniform Resource Identifier
-[x] userinput - Data entered by the user
-[x] varargs - An empty element in a function synopsis indicating a variable
- number of arguments
-[x] variablelist - A list in which each entry is composed of a set of one or
- more terms and an associated description
-[x] varlistentry - A wrapper for a set of terms and the associated description
- in a variable list
-[x] varname - The name of a variable
-[ ] videodata - Pointer to external video data
-[ ] videoobject - A wrapper for video data and its associated meta-information
-[ ] void - An empty element in a function synopsis indicating that the
- function in question takes no arguments
-[ ] volumenum - The volume number of a document in a set (as of books in a set
- or articles in a journal)
-[x] warning - An admonition set off from the text
-[x] wordasword - A word meant specifically as a word and not representing
- anything else
-[x] xref - A cross reference to another part of the document
-[ ] year - The year of publication of a document
-[x] ?asciidoc-br? - line break from asciidoc docbook output
--}
-
-type DB m = StateT DBState m
-
-data DBState = DBState{ dbSectionLevel :: Int
- , dbQuoteType :: QuoteType
- , dbMeta :: Meta
- , dbAcceptsMeta :: Bool
- , dbBook :: Bool
- , dbFigureTitle :: Inlines
- , dbContent :: [Content]
- } deriving Show
-
-instance Default DBState where
- def = DBState{ dbSectionLevel = 0
- , dbQuoteType = DoubleQuote
- , dbMeta = mempty
- , dbAcceptsMeta = False
- , dbBook = False
- , dbFigureTitle = mempty
- , dbContent = [] }
-
-
-readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc
-readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions $ inp
- (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
- return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
-handleInstructions :: String -> String
-handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
-handleInstructions xs = case break (=='<') xs of
- (ys, []) -> ys
- ([], '<':zs) -> '<' : handleInstructions zs
- (ys, zs) -> ys ++ handleInstructions zs
-
-getFigure :: PandocMonad m => Element -> DB m Blocks
-getFigure e = do
- tit <- case filterChild (named "title") e of
- Just t -> getInlines t
- Nothing -> return mempty
- modify $ \st -> st{ dbFigureTitle = tit }
- res <- getBlocks e
- modify $ \st -> st{ dbFigureTitle = mempty }
- return res
-
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
- where go :: [Content] -> [Content]
- go (Text (CData CDataRaw _ _):xs) = xs
- go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
- go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
- go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
- go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
- go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = maybe (map toUpper e) id (lookupEntity e)
-
--- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
-attrValue attr elt =
- case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
- Just z -> z
- Nothing -> ""
-
--- convenience function
-named :: String -> Element -> Bool
-named s e = qName (elName e) == s
-
---
-
-acceptingMetadata :: PandocMonad m => DB m a -> DB m a
-acceptingMetadata p = do
- modify (\s -> s { dbAcceptsMeta = True } )
- res <- p
- modify (\s -> s { dbAcceptsMeta = False })
- return res
-
-checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
-checkInMeta p = do
- accepts <- dbAcceptsMeta <$> get
- when accepts p
- return mempty
-
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
-addMeta field val = modify (setMeta field val)
-
-instance HasMeta DBState where
- setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)}
- deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)}
-
-isBlockElement :: Content -> Bool
-isBlockElement (Elem e) = qName (elName e) `elem` blocktags
- where blocktags = ["toc","index","para","formalpara","simpara",
- "ackno","epigraph","blockquote","bibliography","bibliodiv",
- "biblioentry","glossee","glosseealso","glossary",
- "glossdiv","glosslist","chapter","appendix","preface",
- "bridgehead","sect1","sect2","sect3","sect4","sect5","section",
- "refsect1","refsect2","refsect3","refsection",
- "important","caution","note","tip","warning","qandadiv",
- "question","answer","abstract","itemizedlist","orderedlist",
- "variablelist","article","book","table","informaltable",
- "informalexample", "linegroup",
- "screen","programlisting","example","calloutlist"]
-isBlockElement _ = False
-
--- Trim leading and trailing newline characters
-trimNl :: String -> String
-trimNl = reverse . go . reverse . go
- where go ('\n':xs) = xs
- go xs = xs
-
--- meld text into beginning of first paragraph of Blocks.
--- assumes Blocks start with a Para; if not, does nothing.
-addToStart :: Inlines -> Blocks -> Blocks
-addToStart toadd bs =
- case toList bs of
- (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest
- _ -> bs
-
--- function that is used by both mediaobject (in parseBlock)
--- and inlinemediaobject (in parseInline)
--- A DocBook mediaobject is a wrapper around a set of alternative presentations
-getMediaobject :: PandocMonad m => Element -> DB m Inlines
-getMediaobject e = do
- (imageUrl, attr) <-
- case filterChild (named "imageobject") e of
- Nothing -> return (mempty, nullAttr)
- Just z -> case filterChild (named "imagedata") z of
- Nothing -> return (mempty, nullAttr)
- Just i -> let atVal a = attrValue a i
- w = case atVal "width" of
- "" -> []
- d -> [("width", d)]
- h = case atVal "depth" of
- "" -> []
- d -> [("height", d)]
- atr = (atVal "id", words $ atVal "role", w ++ h)
- in return (atVal "fileref", atr)
- let getCaption el = case filterChild (\x -> named "caption" x
- || named "textobject" x
- || named "alt" x) el of
- Nothing -> return mempty
- Just z -> mconcat <$> (mapM parseInline $ elContent z)
- figTitle <- gets dbFigureTitle
- let (caption, title) = if isNull figTitle
- then (getCaption e, "")
- else (return figTitle, "fig:")
- liftM (imageWith attr imageUrl title) caption
-
-getBlocks :: PandocMonad m => Element -> DB m Blocks
-getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
-
-
-parseBlock :: PandocMonad m => Content -> DB m Blocks
-parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
- then return mempty
- else return $ plain $ trimInlines $ text s
-parseBlock (CRef x) = return $ plain $ str $ map toUpper x
-parseBlock (Elem e) =
- case qName (elName e) of
- "toc" -> return mempty -- skip TOC, since in pandoc it's autogenerated
- "index" -> return mempty -- skip index, since page numbers meaningless
- "para" -> parseMixed para (elContent e)
- "formalpara" -> do
- tit <- case filterChild (named "title") e of
- Just t -> (para . strong . (<> str ".")) <$>
- getInlines t
- Nothing -> return mempty
- (tit <>) <$> parseMixed para (elContent e)
- "simpara" -> parseMixed para (elContent e)
- "ackno" -> parseMixed para (elContent e)
- "epigraph" -> parseBlockquote
- "blockquote" -> parseBlockquote
- "attribution" -> return mempty
- "titleabbrev" -> return mempty
- "authorinitials" -> return mempty
- "title" -> checkInMeta getTitle
- "author" -> checkInMeta getAuthor
- "authorgroup" -> checkInMeta getAuthorGroup
- "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
- "date" -> checkInMeta getDate
- "bibliography" -> sect 0
- "bibliodiv" -> sect 1
- "biblioentry" -> parseMixed para (elContent e)
- "bibliomixed" -> parseMixed para (elContent e)
- "glosssee" -> para . (\ils -> text "See " <> ils <> str ".")
- <$> getInlines e
- "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".")
- <$> getInlines e
- "glossary" -> sect 0
- "glossdiv" -> definitionList <$>
- mapM parseGlossEntry (filterChildren (named "glossentry") e)
- "glosslist" -> definitionList <$>
- mapM parseGlossEntry (filterChildren (named "glossentry") e)
- "chapter" -> sect 0
- "appendix" -> sect 0
- "preface" -> sect 0
- "bridgehead" -> para . strong <$> getInlines e
- "sect1" -> sect 1
- "sect2" -> sect 2
- "sect3" -> sect 3
- "sect4" -> sect 4
- "sect5" -> sect 5
- "section" -> gets dbSectionLevel >>= sect . (+1)
- "refsect1" -> sect 1
- "refsect2" -> sect 2
- "refsect3" -> sect 3
- "refsection" -> gets dbSectionLevel >>= sect . (+1)
- "important" -> blockQuote . (para (strong $ str "Important") <>)
- <$> getBlocks e
- "caution" -> blockQuote . (para (strong $ str "Caution") <>)
- <$> getBlocks e
- "note" -> blockQuote . (para (strong $ str "Note") <>)
- <$> getBlocks e
- "tip" -> blockQuote . (para (strong $ str "Tip") <>)
- <$> getBlocks e
- "warning" -> blockQuote . (para (strong $ str "Warning") <>)
- <$> getBlocks e
- "area" -> return mempty
- "areaset" -> return mempty
- "areaspec" -> return mempty
- "qandadiv" -> gets dbSectionLevel >>= sect . (+1)
- "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e
- "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e
- "abstract" -> blockQuote <$> getBlocks e
- "calloutlist" -> bulletList <$> callouts
- "itemizedlist" -> bulletList <$> listitems
- "orderedlist" -> do
- let listStyle = case attrValue "numeration" e of
- "arabic" -> Decimal
- "loweralpha" -> LowerAlpha
- "upperalpha" -> UpperAlpha
- "lowerroman" -> LowerRoman
- "upperroman" -> UpperRoman
- _ -> Decimal
- let start = fromMaybe 1 $
- (attrValue "override" <$> filterElement (named "listitem") e)
- >>= safeRead
- orderedListWith (start,listStyle,DefaultDelim)
- <$> listitems
- "variablelist" -> definitionList <$> deflistitems
- "figure" -> getFigure e
- "mediaobject" -> para <$> getMediaobject e
- "caption" -> return mempty
- "info" -> metaBlock
- "articleinfo" -> metaBlock
- "sectioninfo" -> return mempty -- keywords & other metadata
- "refsectioninfo" -> return mempty -- keywords & other metadata
- "refsect1info" -> return mempty -- keywords & other metadata
- "refsect2info" -> return mempty -- keywords & other metadata
- "refsect3info" -> return mempty -- keywords & other metadata
- "sect1info" -> return mempty -- keywords & other metadata
- "sect2info" -> return mempty -- keywords & other metadata
- "sect3info" -> return mempty -- keywords & other metadata
- "sect4info" -> return mempty -- keywords & other metadata
- "sect5info" -> return mempty -- keywords & other metadata
- "chapterinfo" -> return mempty -- keywords & other metadata
- "glossaryinfo" -> return mempty -- keywords & other metadata
- "appendixinfo" -> return mempty -- keywords & other metadata
- "bookinfo" -> metaBlock
- "article" -> modify (\st -> st{ dbBook = False }) >>
- getBlocks e
- "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
- "table" -> parseTable
- "informaltable" -> parseTable
- "informalexample" -> divWith ("", ["informalexample"], []) <$>
- getBlocks e
- "linegroup" -> lineBlock <$> lineItems
- "literallayout" -> codeBlockWithLang
- "screen" -> codeBlockWithLang
- "programlisting" -> codeBlockWithLang
- "?xml" -> return mempty
- _ -> getBlocks e
- where parseMixed container conts = do
- let (ils,rest) = break isBlockElement conts
- ils' <- (trimInlines . mconcat) <$> mapM parseInline ils
- let p = if ils' == mempty then mempty else container ils'
- case rest of
- [] -> return p
- (r:rs) -> do
- b <- parseBlock r
- x <- parseMixed container rs
- return $ p <> b <> x
- codeBlockWithLang = do
- let classes' = case attrValue "language" e of
- "" -> []
- x -> [x]
- return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContentRecursive e
- parseBlockquote = do
- attrib <- case filterChild (named "attribution") e of
- Nothing -> return mempty
- Just z -> (para . (str "— " <>) . mconcat)
- <$> (mapM parseInline $ elContent z)
- contents <- getBlocks e
- return $ blockQuote (contents <> attrib)
- listitems = mapM getBlocks $ filterChildren (named "listitem") e
- callouts = mapM getBlocks $ filterChildren (named "callout") e
- deflistitems = mapM parseVarListEntry $ filterChildren
- (named "varlistentry") e
- parseVarListEntry e' = do
- let terms = filterChildren (named "term") e'
- let items = filterChildren (named "listitem") e'
- terms' <- mapM getInlines terms
- items' <- mapM getBlocks items
- return (mconcat $ intersperse (str "; ") terms', items')
- parseGlossEntry e' = do
- let terms = filterChildren (named "glossterm") e'
- let items = filterChildren (named "glossdef") e'
- terms' <- mapM getInlines terms
- items' <- mapM getBlocks items
- return (mconcat $ intersperse (str "; ") terms', items')
- getTitle = do
- tit <- getInlines e
- subtit <- case filterChild (named "subtitle") e of
- Just s -> (text ": " <>) <$>
- getInlines s
- Nothing -> return mempty
- addMeta "title" (tit <> subtit)
-
- getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
- getAuthorGroup = do
- let terms = filterChildren (named "author") e
- mapM getInlines terms >>= addMeta "author"
- getDate = getInlines e >>= addMeta "date"
- parseTable = do
- let isCaption x = named "title" x || named "caption" x
- caption <- case filterChild isCaption e of
- Just t -> getInlines t
- Nothing -> return mempty
- let e' = fromMaybe e $ filterChild (named "tgroup") e
- let isColspec x = named "colspec" x || named "col" x
- let colspecs = case filterChild (named "colgroup") e' of
- Just c -> filterChildren isColspec c
- _ -> filterChildren isColspec e'
- let isRow x = named "row" x || named "tr" x
- headrows <- case filterChild (named "thead") e' of
- Just h -> case filterChild isRow h of
- Just x -> parseRow x
- Nothing -> return []
- Nothing -> return []
- bodyrows <- case filterChild (named "tbody") e' of
- Just b -> mapM parseRow
- $ filterChildren isRow b
- Nothing -> mapM parseRow
- $ filterChildren isRow e'
- let toAlignment c = case findAttr (unqual "align") c of
- Just "left" -> AlignLeft
- Just "right" -> AlignRight
- Just "center" -> AlignCenter
- _ -> AlignDefault
- let toWidth c = case findAttr (unqual "colwidth") c of
- Just w -> fromMaybe 0
- $ safeRead $ '0': filter (\x ->
- (x >= '0' && x <= '9')
- || x == '.') w
- Nothing -> 0 :: Double
- let numrows = case bodyrows of
- [] -> 0
- xs -> maximum $ map length xs
- let aligns = case colspecs of
- [] -> replicate numrows AlignDefault
- cs -> map toAlignment cs
- let widths = case colspecs of
- [] -> replicate numrows 0
- cs -> let ws = map toWidth cs
- tot = sum ws
- in if all (> 0) ws
- then map (/ tot) ws
- else replicate numrows 0
- let headrows' = if null headrows
- then replicate numrows mempty
- else headrows
- return $ table caption (zip aligns widths)
- headrows' bodyrows
- isEntry x = named "entry" x || named "td" x || named "th" x
- parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
- sect n = do isbook <- gets dbBook
- let n' = if isbook || n == 0 then n + 1 else n
- headerText <- case filterChild (named "title") e `mplus`
- (filterChild (named "info") e >>=
- filterChild (named "title")) of
- Just t -> getInlines t
- Nothing -> return mempty
- modify $ \st -> st{ dbSectionLevel = n }
- b <- getBlocks e
- let ident = attrValue "id" e
- modify $ \st -> st{ dbSectionLevel = n - 1 }
- return $ headerWith (ident,[],[]) n' headerText <> b
- lineItems = mapM getInlines $ filterChildren (named "line") e
- metaBlock = acceptingMetadata (getBlocks e) >> return mempty
-
-getInlines :: PandocMonad m => Element -> DB m Inlines
-getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
-
-strContentRecursive :: Element -> String
-strContentRecursive = strContent .
- (\e' -> e'{ elContent = map elementToStr $ elContent e' })
-
-elementToStr :: Content -> Content
-elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
-elementToStr x = x
-
-parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text s
-parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref
-parseInline (Elem e) =
- case qName (elName e) of
- "equation" -> equation displayMath
- "informalequation" -> equation displayMath
- "inlineequation" -> equation math
- "subscript" -> subscript <$> innerInlines
- "superscript" -> superscript <$> innerInlines
- "inlinemediaobject" -> getMediaobject e
- "quote" -> do
- qt <- gets dbQuoteType
- let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
- modify $ \st -> st{ dbQuoteType = qt' }
- contents <- innerInlines
- modify $ \st -> st{ dbQuoteType = qt }
- return $ if qt == SingleQuote
- then singleQuoted contents
- else doubleQuoted contents
- "simplelist" -> simpleList
- "segmentedlist" -> segmentedList
- "classname" -> codeWithLang
- "code" -> codeWithLang
- "filename" -> codeWithLang
- "literal" -> codeWithLang
- "computeroutput" -> codeWithLang
- "prompt" -> codeWithLang
- "parameter" -> codeWithLang
- "option" -> codeWithLang
- "optional" -> do x <- getInlines e
- return $ str "[" <> x <> str "]"
- "markup" -> codeWithLang
- "wordasword" -> emph <$> innerInlines
- "command" -> codeWithLang
- "varname" -> codeWithLang
- "function" -> codeWithLang
- "type" -> codeWithLang
- "symbol" -> codeWithLang
- "constant" -> codeWithLang
- "userinput" -> codeWithLang
- "varargs" -> return $ code "(...)"
- "keycap" -> return (str $ strContent e)
- "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
- "menuchoice" -> menuchoice <$> (mapM parseInline $
- filter isGuiMenu $ elContent e)
- "xref" -> do
- content <- dbContent <$> get
- let linkend = attrValue "linkend" e
- let title = case attrValue "endterm" e of
- "" -> maybe "???" xrefTitleByElem
- (findElementById linkend content)
- endterm -> maybe "???" strContent
- (findElementById endterm content)
- return $ link ('#' : linkend) "" (text title)
- "email" -> return $ link ("mailto:" ++ strContent e) ""
- $ str $ strContent e
- "uri" -> return $ link (strContent e) "" $ str $ strContent e
- "ulink" -> link (attrValue "url" e) "" <$> innerInlines
- "link" -> do
- ils <- innerInlines
- let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> h
- _ -> ('#' : attrValue "linkend" e)
- let ils' = if ils == mempty then str href else ils
- let attr = (attrValue "id" e, words $ attrValue "role" e, [])
- return $ linkWith attr href "" ils'
- "foreignphrase" -> emph <$> innerInlines
- "emphasis" -> case attrValue "role" e of
- "bold" -> strong <$> innerInlines
- "strong" -> strong <$> innerInlines
- "strikethrough" -> strikeout <$> innerInlines
- _ -> emph <$> innerInlines
- "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
- "title" -> return mempty
- "affiliation" -> return mempty
- -- Note: this isn't a real docbook tag; it's what we convert
- -- <?asciidor-br?> to in handleInstructions, above. A kludge to
- -- work around xml-light's inability to parse an instruction.
- "br" -> return linebreak
- _ -> innerInlines
- where innerInlines = (trimInlines . mconcat) <$>
- (mapM parseInline $ elContent e)
- equation constructor = return $ mconcat $
- map (constructor . writeTeX)
- $ rights
- $ map (readMathML . showElement . everywhere (mkT removePrefix))
- $ filterChildren (\x -> qName (elName x) == "math" &&
- qPrefix (elName x) == Just "mml") e
- removePrefix elname = elname { qPrefix = Nothing }
- codeWithLang = do
- let classes' = case attrValue "language" e of
- "" -> []
- l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
- simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
- (filterChildren (named "member") e)
- segmentedList = do
- tit <- maybe (return mempty) getInlines $ filterChild (named "title") e
- segtits <- mapM getInlines $ filterChildren (named "segtitle") e
- segitems <- mapM (mapM getInlines . filterChildren (named "seg"))
- $ filterChildren (named "seglistitem") e
- let toSeg = mconcat . zipWith (\x y -> strong (x <> str ":") <> space <>
- y <> linebreak) segtits
- let segs = mconcat $ map toSeg segitems
- let tit' = if tit == mempty
- then mempty
- else strong tit <> linebreak
- return $ linebreak <> tit' <> segs
- keycombo = spanWith ("",["keycombo"],[]) .
- mconcat . intersperse (str "+")
- menuchoice = spanWith ("",["menuchoice"],[]) .
- mconcat . intersperse (text " > ")
- isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x ||
- named "guimenuitem" x
- isGuiMenu _ = False
-
- findElementById idString content
- = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content]
-
- -- Use the 'xreflabel' attribute for getting the title of a xref link;
- -- if there's no such attribute, employ some heuristics based on what
- -- docbook-xsl does.
- xrefTitleByElem el
- | not (null xrefLabel) = xrefLabel
- | otherwise = case qName (elName el) of
- "chapter" -> descendantContent "title" el
- "sect1" -> descendantContent "title" el
- "sect2" -> descendantContent "title" el
- "sect3" -> descendantContent "title" el
- "sect4" -> descendantContent "title" el
- "sect5" -> descendantContent "title" el
- "cmdsynopsis" -> descendantContent "command" el
- "funcsynopsis" -> descendantContent "function" el
- _ -> qName (elName el) ++ "_title"
- where
- xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" strContent
- . filterElementName (\n -> qName n == name)
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
deleted file mode 100644
index 8936a0403..000000000
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ /dev/null
@@ -1,626 +0,0 @@
-{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-}
-
-{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
-to 'Pandoc' document. -}
-
-{-
-Current state of implementation of Docx entities ([x] means
-implemented, [-] means partially implemented):
-
-* Blocks
-
- - [X] Para
- - [X] CodeBlock (styled with `SourceCode`)
- - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
- indented)
- - [X] OrderedList
- - [X] BulletList
- - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
- - [X] Header (styled with `Heading#`)
- - [ ] HorizontalRule
- - [-] Table (column widths and alignments not yet implemented)
-
-* Inlines
-
- - [X] Str
- - [X] Emph (italics and underline both read as Emph)
- - [X] Strong
- - [X] Strikeout
- - [X] Superscript
- - [X] Subscript
- - [X] SmallCaps
- - [ ] Quoted
- - [ ] Cite
- - [X] Code (styled with `VerbatimChar`)
- - [X] Space
- - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
- - [X] Math
- - [X] Link (links to an arbitrary bookmark create a span with the target as
- id and "anchor" class)
- - [X] Image
- - [X] Note (Footnotes and Endnotes are silently combined.)
--}
-
-module Text.Pandoc.Readers.Docx
- ( readDocxWithWarnings
- , readDocx
- ) where
-
-import Codec.Archive.Zip
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Builder
-import Text.Pandoc.Walk
-import Text.Pandoc.Readers.Docx.Parse
-import Text.Pandoc.Readers.Docx.Lists
-import Text.Pandoc.Readers.Docx.Combine
-import Text.Pandoc.Shared
-import Text.Pandoc.MediaBag (MediaBag)
-import Data.List (delete, intersect)
-import Text.TeXMath (writeTeX)
-import Data.Default (Default)
-import qualified Data.ByteString.Lazy as B
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import Control.Monad.Reader
-import Control.Monad.State
-import Data.Sequence (ViewL(..), viewl)
-import qualified Data.Sequence as Seq (null)
-#if !(MIN_VERSION_base(4,8,0))
-import Data.Traversable (traverse)
-#endif
-import Text.Pandoc.Error
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad)
-import qualified Text.Pandoc.Class as P
-import Text.Pandoc.Logging
-
-readDocx :: PandocMonad m
- => ReaderOptions
- -> B.ByteString
- -> m Pandoc
-readDocx opts bytes
- | Right archive <- toArchiveOrFail bytes
- , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
- mapM_ (P.report . DocxParserWarning) parserWarnings
- (meta, blks) <- docxToOutput opts docx
- return $ Pandoc meta blks
-readDocx _ _ =
- throwError $ PandocSomeError "couldn't parse docx file"
-
--- TODO remove this for 2.0:
-readDocxWithWarnings :: PandocMonad m
- => ReaderOptions
- -> B.ByteString
- -> m Pandoc
-readDocxWithWarnings = readDocx
-
-data DState = DState { docxAnchorMap :: M.Map String String
- , docxMediaBag :: MediaBag
- , docxDropCap :: Inlines
- , docxWarnings :: [String]
- }
-
-instance Default DState where
- def = DState { docxAnchorMap = M.empty
- , docxMediaBag = mempty
- , docxDropCap = mempty
- , docxWarnings = []
- }
-
-data DEnv = DEnv { docxOptions :: ReaderOptions
- , docxInHeaderBlock :: Bool }
-
-instance Default DEnv where
- def = DEnv def False
-
-type DocxContext m = ReaderT DEnv (StateT DState m)
-
-evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
-evalDocxContext ctx env st = flip evalStateT st $ flip runReaderT env $ ctx
-
--- This is empty, but we put it in for future-proofing.
-spansToKeep :: [String]
-spansToKeep = []
-
-divsToKeep :: [String]
-divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
-
-metaStyles :: M.Map String String
-metaStyles = M.fromList [ ("Title", "title")
- , ("Subtitle", "subtitle")
- , ("Author", "author")
- , ("Date", "date")
- , ("Abstract", "abstract")]
-
-sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
-sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
-
-isMetaPar :: BodyPart -> Bool
-isMetaPar (Paragraph pPr _) =
- not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
-isMetaPar _ = False
-
-isEmptyPar :: BodyPart -> Bool
-isEmptyPar (Paragraph _ parParts) =
- all isEmptyParPart parParts
- where
- isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
- isEmptyParPart _ = False
- isEmptyElem (TextRun s) = trim s == ""
- isEmptyElem _ = True
-isEmptyPar _ = False
-
-bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
-bodyPartsToMeta' [] = return M.empty
-bodyPartsToMeta' (bp : bps)
- | (Paragraph pPr parParts) <- bp
- , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
- , (Just metaField) <- M.lookup c metaStyles = do
- inlines <- smushInlines <$> mapM parPartToInlines parParts
- remaining <- bodyPartsToMeta' bps
- let
- f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
- f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
- f m (MetaList mv) = MetaList (m : mv)
- f m n = MetaList [m, n]
- return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
-bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
-
-bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
-bodyPartsToMeta bps = do
- mp <- bodyPartsToMeta' bps
- let mp' =
- case M.lookup "author" mp of
- Just mv -> M.insert "author" (fixAuthors mv) mp
- Nothing -> mp
- return $ Meta mp'
-
-fixAuthors :: MetaValue -> MetaValue
-fixAuthors (MetaBlocks blks) =
- MetaList $ map g $ filter f blks
- where f (Para _) = True
- f _ = False
- g (Para ils) = MetaInlines ils
- g _ = MetaInlines []
-fixAuthors mv = mv
-
-codeStyles :: [String]
-codeStyles = ["VerbatimChar"]
-
-codeDivs :: [String]
-codeDivs = ["SourceCode"]
-
-runElemToInlines :: RunElem -> Inlines
-runElemToInlines (TextRun s) = text s
-runElemToInlines (LnBrk) = linebreak
-runElemToInlines (Tab) = space
-runElemToInlines (SoftHyphen) = text "\xad"
-runElemToInlines (NoBreakHyphen) = text "\x2011"
-
-runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString (LnBrk) = ['\n']
-runElemToString (Tab) = ['\t']
-runElemToString (SoftHyphen) = ['\xad']
-runElemToString (NoBreakHyphen) = ['\x2011']
-
-runToString :: Run -> String
-runToString (Run _ runElems) = concatMap runElemToString runElems
-runToString _ = ""
-
-parPartToString :: ParPart -> String
-parPartToString (PlainRun run) = runToString run
-parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
-parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
-parPartToString _ = ""
-
-blacklistedCharStyles :: [String]
-blacklistedCharStyles = ["Hyperlink"]
-
-resolveDependentRunStyle :: RunStyle -> RunStyle
-resolveDependentRunStyle rPr
- | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
- rPr
- | Just (_, cs) <- rStyle rPr =
- let rPr' = resolveDependentRunStyle cs
- in
- RunStyle { isBold = case isBold rPr of
- Just bool -> Just bool
- Nothing -> isBold rPr'
- , isItalic = case isItalic rPr of
- Just bool -> Just bool
- Nothing -> isItalic rPr'
- , isSmallCaps = case isSmallCaps rPr of
- Just bool -> Just bool
- Nothing -> isSmallCaps rPr'
- , isStrike = case isStrike rPr of
- Just bool -> Just bool
- Nothing -> isStrike rPr'
- , rVertAlign = case rVertAlign rPr of
- Just valign -> Just valign
- Nothing -> rVertAlign rPr'
- , rUnderline = case rUnderline rPr of
- Just ulstyle -> Just ulstyle
- Nothing -> rUnderline rPr'
- , rStyle = rStyle rPr }
- | otherwise = rPr
-
-runStyleToTransform :: RunStyle -> (Inlines -> Inlines)
-runStyleToTransform rPr
- | Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep =
- let rPr' = rPr{rStyle = Nothing}
- in
- (spanWith ("", [s], [])) . (runStyleToTransform rPr')
- | Just True <- isItalic rPr =
- emph . (runStyleToTransform rPr {isItalic = Nothing})
- | Just True <- isBold rPr =
- strong . (runStyleToTransform rPr {isBold = Nothing})
- | Just True <- isSmallCaps rPr =
- smallcaps . (runStyleToTransform rPr {isSmallCaps = Nothing})
- | Just True <- isStrike rPr =
- strikeout . (runStyleToTransform rPr {isStrike = Nothing})
- | Just SupScrpt <- rVertAlign rPr =
- superscript . (runStyleToTransform rPr {rVertAlign = Nothing})
- | Just SubScrpt <- rVertAlign rPr =
- subscript . (runStyleToTransform rPr {rVertAlign = Nothing})
- | Just "single" <- rUnderline rPr =
- emph . (runStyleToTransform rPr {rUnderline = Nothing})
- | otherwise = id
-
-runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
-runToInlines (Run rs runElems)
- | Just (s, _) <- rStyle rs
- , s `elem` codeStyles =
- let rPr = resolveDependentRunStyle rs
- codeString = code $ concatMap runElemToString runElems
- in
- return $ case rVertAlign rPr of
- Just SupScrpt -> superscript codeString
- Just SubScrpt -> subscript codeString
- _ -> codeString
- | otherwise = do
- let ils = smushInlines (map runElemToInlines runElems)
- return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
-runToInlines (Footnote bps) = do
- blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
- return $ note blksList
-runToInlines (Endnote bps) = do
- blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
- return $ note blksList
-runToInlines (InlineDrawing fp title alt bs ext) = do
- (lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
-runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-
-extentToAttr :: Extent -> Attr
-extentToAttr (Just (w, h)) =
- ("", [], [("width", showDim w), ("height", showDim h)] )
- where
- showDim d = show (d / 914400) ++ "in"
-extentToAttr _ = nullAttr
-
-blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
-blocksToInlinesWarn cmtId blks = do
- let blkList = toList blks
- notParaOrPlain :: Block -> Bool
- notParaOrPlain (Para _) = False
- notParaOrPlain (Plain _) = False
- notParaOrPlain _ = True
- when (not $ null $ filter notParaOrPlain blkList) $
- lift $ P.report $ DocxParserWarning $
- "Docx comment " ++ cmtId ++ " will not retain formatting"
- return $ fromList $ blocksToInlines blkList
-
-parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
-parPartToInlines (PlainRun r) = runToInlines r
-parPartToInlines (Insertion _ author date runs) = do
- opts <- asks docxOptions
- case readerTrackChanges opts of
- AcceptChanges -> smushInlines <$> mapM runToInlines runs
- RejectChanges -> return mempty
- AllChanges -> do
- ils <- smushInlines <$> mapM runToInlines runs
- let attr = ("", ["insertion"], [("author", author), ("date", date)])
- return $ spanWith attr ils
-parPartToInlines (Deletion _ author date runs) = do
- opts <- asks docxOptions
- case readerTrackChanges opts of
- AcceptChanges -> return mempty
- RejectChanges -> smushInlines <$> mapM runToInlines runs
- AllChanges -> do
- ils <- smushInlines <$> mapM runToInlines runs
- let attr = ("", ["deletion"], [("author", author), ("date", date)])
- return $ spanWith attr ils
-parPartToInlines (CommentStart cmtId author date bodyParts) = do
- opts <- asks docxOptions
- case readerTrackChanges opts of
- AllChanges -> do
- blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
- ils <- blocksToInlinesWarn cmtId blks
- let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
- return $ spanWith attr ils
- _ -> return mempty
-parPartToInlines (CommentEnd cmtId) = do
- opts <- asks docxOptions
- case readerTrackChanges opts of
- AllChanges -> do
- let attr = ("", ["comment-end"], [("id", cmtId)])
- return $ spanWith attr mempty
- _ -> return mempty
-parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
- return mempty
-parPartToInlines (BookMark _ anchor) =
- -- We record these, so we can make sure not to overwrite
- -- user-defined anchor links with header auto ids.
- do
- -- get whether we're in a header.
- inHdrBool <- asks docxInHeaderBlock
- -- Get the anchor map.
- anchorMap <- gets docxAnchorMap
- -- We don't want to rewrite if we're in a header, since we'll take
- -- care of that later, when we make the header anchor. If the
- -- bookmark were already in uniqueIdent form, this would lead to a
- -- duplication. Otherwise, we check to see if the id is already in
- -- there. Rewrite if necessary. This will have the possible effect
- -- of rewriting user-defined anchor links. However, since these
- -- are not defined in pandoc, it seems like a necessary evil to
- -- avoid an extra pass.
- let newAnchor =
- if not inHdrBool && anchor `elem` (M.elems anchorMap)
- then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
- else anchor
- unless inHdrBool
- (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
- return $ spanWith (newAnchor, ["anchor"], []) mempty
-parPartToInlines (Drawing fp title alt bs ext) = do
- (lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
-parPartToInlines Chart = do
- return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-parPartToInlines (InternalHyperLink anchor runs) = do
- ils <- smushInlines <$> mapM runToInlines runs
- return $ link ('#' : anchor) "" ils
-parPartToInlines (ExternalHyperLink target runs) = do
- ils <- smushInlines <$> mapM runToInlines runs
- return $ link target "" ils
-parPartToInlines (PlainOMath exps) = do
- return $ math $ writeTeX exps
-parPartToInlines (SmartTag runs) = do
- ils <- smushInlines <$> mapM runToInlines runs
- return ils
-
-isAnchorSpan :: Inline -> Bool
-isAnchorSpan (Span (_, classes, kvs) _) =
- classes == ["anchor"] &&
- null kvs
-isAnchorSpan _ = False
-
-dummyAnchors :: [String]
-dummyAnchors = ["_GoBack"]
-
-makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
-makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
-
-makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
--- If there is an anchor already there (an anchor span in the header,
--- to be exact), we rename and associate the new id with the old one.
-makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
- | (c:_) <- filter isAnchorSpan ils
- , (Span (anchIdent, ["anchor"], _) cIls) <- c = do
- hdrIDMap <- gets docxAnchorMap
- let newIdent = if null ident
- then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
- else ident
- newIls = concatMap f ils where f il | il == c = cIls
- | otherwise = [il]
- modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
- makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
--- Otherwise we just give it a name, and register that name (associate
--- it with itself.)
-makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
- do
- hdrIDMap <- gets docxAnchorMap
- let newIdent = if null ident
- then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
- else ident
- modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
- return $ Header n (newIdent, classes, kvs) ils
-makeHeaderAnchor' blk = return blk
-
--- Rewrite a standalone paragraph block as a plain
-singleParaToPlain :: Blocks -> Blocks
-singleParaToPlain blks
- | (Para (ils) :< seeq) <- viewl $ unMany blks
- , Seq.null seeq =
- singleton $ Plain ils
-singleParaToPlain blks = blks
-
-cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
-cellToBlocks (Cell bps) = do
- blks <- smushBlocks <$> mapM bodyPartToBlocks bps
- return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
-
-rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
-rowToBlocksList (Row cells) = do
- blksList <- mapM cellToBlocks cells
- return $ map singleParaToPlain blksList
-
-trimLineBreaks :: [Inline] -> [Inline]
-trimLineBreaks [] = []
-trimLineBreaks (LineBreak : ils) = trimLineBreaks ils
-trimLineBreaks ils
- | (LineBreak : ils') <- reverse ils = trimLineBreaks (reverse ils')
-trimLineBreaks ils = ils
-
-parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks)
-parStyleToTransform pPr
- | (c:cs) <- pStyle pPr
- , c `elem` divsToKeep =
- let pPr' = pPr { pStyle = cs }
- in
- (divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs =
- let pPr' = pPr { pStyle = cs, indentation = Nothing}
- in
- (divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (_:cs) <- pStyle pPr
- , Just True <- pBlockQuote pPr =
- let pPr' = pPr { pStyle = cs }
- in
- blockQuote . (parStyleToTransform pPr')
- | (_:cs) <- pStyle pPr =
- let pPr' = pPr { pStyle = cs}
- in
- parStyleToTransform pPr'
- | null (pStyle pPr)
- , Just left <- indentation pPr >>= leftParIndent
- , Just hang <- indentation pPr >>= hangingParIndent =
- let pPr' = pPr { indentation = Nothing }
- in
- case (left - hang) > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
- | null (pStyle pPr),
- Just left <- indentation pPr >>= leftParIndent =
- let pPr' = pPr { indentation = Nothing }
- in
- case left > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
-parStyleToTransform _ = id
-
-bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
-bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) =
- return
- $ parStyleToTransform pPr
- $ codeBlock
- $ concatMap parPartToString parparts
- | Just (style, n) <- pHeading pPr = do
- ils <- local (\s-> s{docxInHeaderBlock=True}) $
- (smushInlines <$> mapM parPartToInlines parparts)
- makeHeaderAnchor $
- headerWith ("", delete style (pStyle pPr), []) n ils
- | otherwise = do
- ils <- smushInlines <$> mapM parPartToInlines parparts >>=
- (return . fromList . trimLineBreaks . normalizeSpaces . toList)
- dropIls <- gets docxDropCap
- let ils' = dropIls <> ils
- if dropCap pPr
- then do modify $ \s -> s { docxDropCap = ils' }
- return mempty
- else do modify $ \s -> s { docxDropCap = mempty }
- return $ case isNull ils' of
- True -> mempty
- _ -> parStyleToTransform pPr $ para ils'
-bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
- let
- kvs = case levelInfo of
- (_, fmt, txt, Just start) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- , ("start", (show start))
- ]
-
- (_, fmt, txt, Nothing) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- ]
- blks <- bodyPartToBlocks (Paragraph pPr parparts)
- return $ divWith ("", ["list-item"], kvs) blks
-bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
- let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)}
- in
- bodyPartToBlocks $ Paragraph pPr' parparts
-bodyPartToBlocks (Tbl _ _ _ []) =
- return $ para mempty
-bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
- let caption = text cap
- (hdr, rows) = case firstRowFormatting look of
- True | null rs -> (Nothing, [r])
- | otherwise -> (Just r, rs)
- False -> (Nothing, r:rs)
-
- cells <- mapM rowToBlocksList rows
-
- let width = case cells of
- r':_ -> length r'
- -- shouldn't happen
- [] -> 0
-
- hdrCells <- case hdr of
- Just r' -> rowToBlocksList r'
- Nothing -> return $ replicate width mempty
-
- -- The two following variables (horizontal column alignment and
- -- relative column widths) go to the default at the
- -- moment. Width information is in the TblGrid field of the Tbl,
- -- so should be possible. Alignment might be more difficult,
- -- since there doesn't seem to be a column entity in docx.
- let alignments = replicate width AlignDefault
- widths = replicate width 0 :: [Double]
-
- return $ table caption (zip alignments widths) hdrCells cells
-bodyPartToBlocks (OMathPara e) = do
- return $ para $ displayMath (writeTeX e)
-
-
--- replace targets with generated anchors.
-rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
-rewriteLink' l@(Link attr ils ('#':target, title)) = do
- anchorMap <- gets docxAnchorMap
- return $ case M.lookup target anchorMap of
- Just newTarget -> (Link attr ils ('#':newTarget, title))
- Nothing -> l
-rewriteLink' il = return il
-
-rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
-rewriteLinks = mapM (walkM rewriteLink')
-
-bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
-bodyToOutput (Body bps) = do
- let (metabps, blkbps) = sepBodyParts bps
- meta <- bodyPartsToMeta metabps
- blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
- blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
- return $ (meta, blks')
-
-docxToOutput :: PandocMonad m
- => ReaderOptions
- -> Docx
- -> m (Meta, [Block])
-docxToOutput opts (Docx (Document _ body)) =
- let dEnv = def { docxOptions = opts} in
- evalDocxContext (bodyToOutput body) dEnv def
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
deleted file mode 100644
index 39e0df825..000000000
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ /dev/null
@@ -1,154 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- PatternGuards #-}
-
-module Text.Pandoc.Readers.Docx.Combine ( smushInlines
- , smushBlocks
- )
- where
-
-import Text.Pandoc.Builder
-import Data.List
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
-import qualified Data.Sequence as Seq (null)
-
-data Modifier a = Modifier (a -> a)
- | AttrModifier (Attr -> a -> a) Attr
- | NullModifier
-
-spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
-spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
-
-spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
-spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
-
-spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
-spaceOutInlines ils =
- let (fs, ils') = unstackInlines ils
- contents = unMany ils'
- left = case viewl contents of
- (Space :< _) -> space
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> space
- _ -> mempty in
- (left, (stackInlines fs $ trimInlines . Many $ contents), right)
-
-stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
-stackInlines [] ms = ms
-stackInlines (NullModifier : fs) ms = stackInlines fs ms
-stackInlines ((Modifier f) : fs) ms =
- if isEmpty ms
- then stackInlines fs ms
- else f $ stackInlines fs ms
-stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
-
-unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
-unstackInlines ms = case ilModifier ms of
- NullModifier -> ([], ms)
- _ -> (f : fs, ms') where
- f = ilModifier ms
- (fs, ms') = unstackInlines $ ilInnards ms
-
-ilModifier :: Inlines -> Modifier Inlines
-ilModifier ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
- (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
- _ -> NullModifier
-
-ilInnards :: Inlines -> Inlines
-ilInnards ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph lst) -> fromList lst
- (Strong lst) -> fromList lst
- (SmallCaps lst) -> fromList lst
- (Strikeout lst) -> fromList lst
- (Superscript lst) -> fromList lst
- (Subscript lst) -> fromList lst
- (Link _ lst _) -> fromList lst
- (Span _ lst) -> fromList lst
- _ -> ils
- _ -> ils
-
-inlinesL :: Inlines -> (Inlines, Inlines)
-inlinesL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
- _ -> (mempty, ils)
-
-inlinesR :: Inlines -> (Inlines, Inlines)
-inlinesR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
- _ -> (ils, mempty)
-
-combineInlines :: Inlines -> Inlines -> Inlines
-combineInlines x y =
- let (xs', x') = inlinesR x
- (y', ys') = inlinesL y
- in
- xs' <> (combineSingletonInlines x' y') <> ys'
-
-combineSingletonInlines :: Inlines -> Inlines -> Inlines
-combineSingletonInlines x y =
- let (xfs, xs) = unstackInlines x
- (yfs, ys) = unstackInlines y
- shared = xfs `intersect` yfs
- x_remaining = xfs \\ shared
- y_remaining = yfs \\ shared
- x_rem_attr = filter isAttrModifier x_remaining
- y_rem_attr = filter isAttrModifier y_remaining
- in
- case null shared of
- True | isEmpty xs && isEmpty ys ->
- stackInlines (x_rem_attr ++ y_rem_attr) mempty
- | isEmpty xs ->
- let (sp, y') = spaceOutInlinesL y in
- (stackInlines x_rem_attr mempty) <> sp <> y'
- | isEmpty ys ->
- let (x', sp) = spaceOutInlinesR x in
- x' <> sp <> (stackInlines y_rem_attr mempty)
- | otherwise ->
- let (x', xsp) = spaceOutInlinesR x
- (ysp, y') = spaceOutInlinesL y
- in
- x' <> xsp <> ysp <> y'
- False -> stackInlines shared $
- combineInlines
- (stackInlines x_remaining xs)
- (stackInlines y_remaining ys)
-
-combineBlocks :: Blocks -> Blocks -> Blocks
-combineBlocks bs cs
- | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
- , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
- Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
-combineBlocks bs cs = bs <> cs
-
-instance (Monoid a, Eq a) => Eq (Modifier a) where
- (Modifier f) == (Modifier g) = (f mempty == g mempty)
- (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
- (NullModifier) == (NullModifier) = True
- _ == _ = False
-
-isEmpty :: (Monoid a, Eq a) => a -> Bool
-isEmpty x = x == mempty
-
-isAttrModifier :: Modifier a -> Bool
-isAttrModifier (AttrModifier _ _) = True
-isAttrModifier _ = False
-
-smushInlines :: [Inlines] -> Inlines
-smushInlines xs = foldl combineInlines mempty xs
-
-smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
deleted file mode 100644
index 395a53907..000000000
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ /dev/null
@@ -1,229 +0,0 @@
-{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx.Lists
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Functions for converting flat docx paragraphs into nested lists.
--}
-
-module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
- , blocksToDefinitions
- , listParagraphDivs
- ) where
-
-import Text.Pandoc.JSON
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.Shared (trim)
-import Control.Monad
-import Data.List
-import Data.Maybe
-
-isListItem :: Block -> Bool
-isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
-isListItem _ = False
-
-getLevel :: Block -> Maybe Integer
-getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs
-getLevel _ = Nothing
-
-getLevelN :: Block -> Integer
-getLevelN b = case getLevel b of
- Just n -> n
- Nothing -> -1
-
-getNumId :: Block -> Maybe Integer
-getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs
-getNumId _ = Nothing
-
-getNumIdN :: Block -> Integer
-getNumIdN b = case getNumId b of
- Just n -> n
- Nothing -> -1
-
-getText :: Block -> Maybe String
-getText (Div (_, _, kvs) _) = lookup "text" kvs
-getText _ = Nothing
-
-data ListType = Itemized | Enumerated ListAttributes
-
-listStyleMap :: [(String, ListNumberStyle)]
-listStyleMap = [("upperLetter", UpperAlpha),
- ("lowerLetter", LowerAlpha),
- ("upperRoman", UpperRoman),
- ("lowerRoman", LowerRoman),
- ("decimal", Decimal)]
-
-listDelimMap :: [(String, ListNumberDelim)]
-listDelimMap = [("%1)", OneParen),
- ("(%1)", TwoParens),
- ("%1.", Period)]
-
-getListType :: Block -> Maybe ListType
-getListType b@(Div (_, _, kvs) _) | isListItem b =
- let
- start = lookup "start" kvs
- frmt = lookup "format" kvs
- txt = lookup "text" kvs
- in
- case frmt of
- Just "bullet" -> Just Itemized
- Just f ->
- case txt of
- Just t -> Just $ Enumerated (
- read (fromMaybe "1" start) :: Int,
- fromMaybe DefaultStyle (lookup f listStyleMap),
- fromMaybe DefaultDelim (lookup t listDelimMap))
- Nothing -> Nothing
- _ -> Nothing
-getListType _ = Nothing
-
-listParagraphDivs :: [String]
-listParagraphDivs = ["ListParagraph"]
-
--- This is a first stab at going through and attaching meaning to list
--- paragraphs, without an item marker, following a list item. We
--- assume that these are paragraphs in the same item.
-
-handleListParagraphs :: [Block] -> [Block]
-handleListParagraphs [] = []
-handleListParagraphs (
- (Div attr1@(_, classes1, _) blks1) :
- (Div (ident2, classes2, kvs2) blks2) :
- blks
- ) | "list-item" `elem` classes1 &&
- not ("list-item" `elem` classes2) &&
- (not . null) (listParagraphDivs `intersect` classes2) =
- -- We don't want to keep this indent.
- let newDiv2 =
- (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
- in
- handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
-handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
-
-separateBlocks' :: Block -> [[Block]] -> [[Block]]
-separateBlocks' blk ([] : []) = [[blk]]
-separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
--- The following is for the invisible bullet lists. This is how
--- pandoc-generated ooxml does multiparagraph item lists.
-separateBlocks' b acc | liftM trim (getText b) == Just "" =
- (init acc) ++ [(last acc) ++ [b]]
-separateBlocks' b acc = acc ++ [[b]]
-
-separateBlocks :: [Block] -> [[Block]]
-separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
-
-flatToBullets' :: Integer -> [Block] -> [Block]
-flatToBullets' _ [] = []
-flatToBullets' num xs@(b : elems)
- | getLevelN b == num = b : (flatToBullets' num elems)
- | otherwise =
- let bNumId = getNumIdN b
- bLevel = getLevelN b
- (children, remaining) =
- span
- (\b' ->
- ((getLevelN b') > bLevel ||
- ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
- xs
- in
- case getListType b of
- Just (Enumerated attr) ->
- (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
- _ ->
- (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
- (flatToBullets' num remaining)
-
-flatToBullets :: [Block] -> [Block]
-flatToBullets elems = flatToBullets' (-1) elems
-
-singleItemHeaderToHeader :: Block -> Block
-singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
-singleItemHeaderToHeader blk = blk
-
-
-blocksToBullets :: [Block] -> [Block]
-blocksToBullets blks =
- map singleItemHeaderToHeader $
- bottomUp removeListDivs $
- flatToBullets $ (handleListParagraphs blks)
-
-plainParaInlines :: Block -> [Inline]
-plainParaInlines (Plain ils) = ils
-plainParaInlines (Para ils) = ils
-plainParaInlines _ = []
-
-blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
-blocksToDefinitions' [] acc [] = reverse acc
-blocksToDefinitions' defAcc acc [] =
- reverse $ (DefinitionList (reverse defAcc)) : acc
-blocksToDefinitions' defAcc acc
- ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
- | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
- let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- pair = case remainingAttr2 == ("", [], []) of
- True -> (concatMap plainParaInlines blks1, [blks2])
- False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
- in
- blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
- ((Div (ident2, classes2, kvs2) blks2) : blks)
- | (not . null) defAcc && "Definition" `elem` classes2 =
- let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
- defItems2 = case remainingAttr2 == ("", [], []) of
- True -> blks2
- False -> [Div remainingAttr2 blks2]
- ((defTerm, defItems):defs) = defAcc
- defAcc' = case null defItems of
- True -> (defTerm, [defItems2]) : defs
- False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
- in
- blocksToDefinitions' defAcc' acc blks
-blocksToDefinitions' [] acc (b:blks) =
- blocksToDefinitions' [] (b:acc) blks
-blocksToDefinitions' defAcc acc (b:blks) =
- blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
-
-removeListDivs' :: Block -> [Block]
-removeListDivs' (Div (ident, classes, kvs) blks)
- | "list-item" `elem` classes =
- case delete "list-item" classes of
- [] -> blks
- classes' -> [Div (ident, classes', kvs) $ blks]
-removeListDivs' (Div (ident, classes, kvs) blks)
- | not $ null $ listParagraphDivs `intersect` classes =
- case classes \\ listParagraphDivs of
- [] -> blks
- classes' -> [Div (ident, classes', kvs) blks]
-removeListDivs' blk = [blk]
-
-removeListDivs :: [Block] -> [Block]
-removeListDivs = concatMap removeListDivs'
-
-
-
-blocksToDefinitions :: [Block] -> [Block]
-blocksToDefinitions = blocksToDefinitions' [] []
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
deleted file mode 100644
index 221a1d10a..000000000
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ /dev/null
@@ -1,1044 +0,0 @@
-{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
-
-{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Docx.Parse
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
- License : GNU GPL, version 2 or above
-
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of docx archive into Docx haskell type
--}
-
-module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
- , Document(..)
- , Body(..)
- , BodyPart(..)
- , TblLook(..)
- , Extent
- , ParPart(..)
- , Run(..)
- , RunElem(..)
- , Notes
- , Numbering
- , Relationship
- , Media
- , RunStyle(..)
- , VertAlign(..)
- , ParIndentation(..)
- , ParagraphStyle(..)
- , Row(..)
- , Cell(..)
- , archiveToDocx
- , archiveToDocxWithWarnings
- ) where
-import Codec.Archive.Zip
-import Text.XML.Light
-import Data.Maybe
-import Data.List
-import System.FilePath
-import Data.Bits ((.|.))
-import qualified Data.ByteString.Lazy as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad.Reader
-import Control.Monad.State
-import Control.Applicative ((<|>))
-import qualified Data.Map as M
-import Control.Monad.Except
-import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive)
-import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..))
-import Text.TeXMath (Exp)
-import Text.Pandoc.Readers.Docx.Util
-import Data.Char (readLitChar, ord, chr, isDigit)
-
-data ReaderEnv = ReaderEnv { envNotes :: Notes
- , envComments :: Comments
- , envNumbering :: Numbering
- , envRelationships :: [Relationship]
- , envMedia :: Media
- , envFont :: Maybe Font
- , envCharStyles :: CharStyleMap
- , envParStyles :: ParStyleMap
- , envLocation :: DocumentLocation
- }
- deriving Show
-
-data ReaderState = ReaderState { stateWarnings :: [String] }
- deriving Show
-
-data DocxError = DocxError | WrongElem
- deriving Show
-
-type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
-
-runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState)
-runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
-
-maybeToD :: Maybe a -> D a
-maybeToD (Just a) = return a
-maybeToD Nothing = throwError DocxError
-
-eitherToD :: Either a b -> D b
-eitherToD (Right b) = return b
-eitherToD (Left _) = throwError DocxError
-
-concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
-concatMapM f xs = liftM concat (mapM f xs)
-
-
--- This is similar to `mapMaybe`: it maps a function returning the D
--- monad over a list, and only keeps the non-erroring return values.
-mapD :: (a -> D b) -> [a] -> D [b]
-mapD f xs =
- let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return [])
- in
- concatMapM handler xs
-
-data Docx = Docx Document
- deriving Show
-
-data Document = Document NameSpaces Body
- deriving Show
-
-data Body = Body [BodyPart]
- deriving Show
-
-type Media = [(FilePath, B.ByteString)]
-
-type CharStyle = (String, RunStyle)
-
-type ParStyle = (String, ParStyleData)
-
-type CharStyleMap = M.Map String RunStyle
-
-type ParStyleMap = M.Map String ParStyleData
-
-data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
- deriving Show
-
-data Numb = Numb String String -- right now, only a key to an abstract num
- deriving Show
-
-data AbstractNumb = AbstractNumb String [Level]
- deriving Show
-
--- (ilvl, format, string, start)
-type Level = (String, String, String, Maybe Integer)
-
-data DocumentLocation = InDocument | InFootnote | InEndnote
- deriving (Eq,Show)
-
-data Relationship = Relationship DocumentLocation RelId Target
- deriving Show
-
-data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
- deriving Show
-
-data Comments = Comments NameSpaces (M.Map String Element)
- deriving Show
-
-data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
- , rightParIndent :: Maybe Integer
- , hangingParIndent :: Maybe Integer}
- deriving Show
-
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
- , indentation :: Maybe ParIndentation
- , dropCap :: Bool
- , pHeading :: Maybe (String, Int)
- , pNumInfo :: Maybe (String, String)
- , pBlockQuote :: Maybe Bool
- }
- deriving Show
-
-defaultParagraphStyle :: ParagraphStyle
-defaultParagraphStyle = ParagraphStyle { pStyle = []
- , indentation = Nothing
- , dropCap = False
- , pHeading = Nothing
- , pNumInfo = Nothing
- , pBlockQuote = Nothing
- }
-
-
-data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
- | OMathPara [Exp]
- deriving Show
-
-type TblGrid = [Integer]
-
-data TblLook = TblLook {firstRowFormatting::Bool}
- deriving Show
-
-defaultTblLook :: TblLook
-defaultTblLook = TblLook{firstRowFormatting = False}
-
-data Row = Row [Cell]
- deriving Show
-
-data Cell = Cell [BodyPart]
- deriving Show
-
--- (width, height) in EMUs
-type Extent = Maybe (Double, Double)
-
-data ParPart = PlainRun Run
- | Insertion ChangeId Author ChangeDate [Run]
- | Deletion ChangeId Author ChangeDate [Run]
- | CommentStart CommentId Author CommentDate [BodyPart]
- | CommentEnd CommentId
- | BookMark BookMarkId Anchor
- | InternalHyperLink Anchor [Run]
- | ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
- | Chart -- placeholder for now
- | PlainOMath [Exp]
- | SmartTag [Run]
- deriving Show
-
-data Run = Run RunStyle [RunElem]
- | Footnote [BodyPart]
- | Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
- | InlineChart -- placeholder
- deriving Show
-
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
- deriving Show
-
-data VertAlign = BaseLn | SupScrpt | SubScrpt
- deriving Show
-
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
- , isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle}
- deriving Show
-
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
- , isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
- deriving Show
-
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = Nothing
- , isItalic = Nothing
- , isSmallCaps = Nothing
- , isStrike = Nothing
- , rVertAlign = Nothing
- , rUnderline = Nothing
- , rStyle = Nothing}
-
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
-
-archiveToDocx :: Archive -> Either DocxError Docx
-archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
-archiveToDocxWithWarnings archive = do
- let notes = archiveToNotes archive
- comments = archiveToComments archive
- numbering = archiveToNumbering archive
- rels = archiveToRelationships archive
- media = filteredFilesFromArchive archive filePathIsMedia
- (styles, parstyles) = archiveToStyles archive
- rEnv =
- ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
- (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
- case eitherDoc of
- Right doc -> Right (Docx doc, stateWarnings st)
- Left e -> Left e
-
-
-
-archiveToDocument :: Archive -> D Document
-archiveToDocument zf = do
- entry <- maybeToD $ findEntryByPath "word/document.xml" zf
- docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = elemToNameSpaces docElem
- bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
- body <- elemToBody namespaces bodyElem
- return $ Document namespaces body
-
-elemToBody :: NameSpaces -> Element -> D Body
-elemToBody ns element | isElem ns "w" "body" element =
- mapD (elemToBodyPart ns) (elChildren element) >>=
- (\bps -> return $ Body bps)
-elemToBody _ _ = throwError WrongElem
-
-archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
-archiveToStyles zf =
- let stylesElem = findEntryByPath "word/styles.xml" zf >>=
- (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- in
- case stylesElem of
- Nothing -> (M.empty, M.empty)
- Just styElem ->
- let namespaces = elemToNameSpaces styElem
- in
- ( M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe CharStyle),
- M.fromList $ buildBasedOnList namespaces styElem
- (Nothing :: Maybe ParStyle) )
-
-isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
-isBasedOnStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
- , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
- | isElem ns "w" "style" element
- , Just styleType <- findAttrByName ns "w" "type" element
- , styleType == cStyleType parentStyle
- , Nothing <- findChildByName ns "w" "basedOn" element
- , Nothing <- parentStyle = True
- | otherwise = False
-
-class ElemToStyle a where
- cStyleType :: Maybe a -> String
- elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
- getStyleId :: a -> String
-
-instance ElemToStyle CharStyle where
- cStyleType _ = "character"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToRunStyle ns element parentStyle)
- | otherwise = Nothing
- getStyleId s = fst s
-
-instance ElemToStyle ParStyle where
- cStyleType _ = "paragraph"
- elemToStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "paragraph" <- findAttrByName ns "w" "type" element
- , Just styleId <- findAttrByName ns "w" "styleId" element =
- Just (styleId, elemToParStyleData ns element parentStyle)
- | otherwise = Nothing
- getStyleId s = fst s
-
-getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-getStyleChildren ns element parentStyle
- | isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToStyle ns e parentStyle) $
- filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
- | otherwise = []
-
-buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
-buildBasedOnList ns element rootStyle =
- case (getStyleChildren ns element rootStyle) of
- [] -> []
- stys -> stys ++
- (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
-
-archiveToNotes :: Archive -> Notes
-archiveToNotes zf =
- let fnElem = findEntryByPath "word/footnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- enElem = findEntryByPath "word/endnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- fn_namespaces = case fnElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- en_namespaces = case enElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= (elemToNotes ns "footnote")
- en = enElem >>= (elemToNotes ns "endnote")
- in
- Notes ns fn en
-
-archiveToComments :: Archive -> Comments
-archiveToComments zf =
- let cmtsElem = findEntryByPath "word/comments.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- cmts_namespaces = case cmtsElem of
- Just e -> elemToNameSpaces e
- Nothing -> []
- cmts = (elemToComments cmts_namespaces) <$> cmtsElem
- in
- case cmts of
- Just c -> Comments cmts_namespaces c
- Nothing -> Comments cmts_namespaces M.empty
-
-filePathToRelType :: FilePath -> Maybe DocumentLocation
-filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
-filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
-filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
-filePathToRelType _ = Nothing
-
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
- do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
-
-filePathToRelationships :: Archive -> FilePath -> [Relationship]
-filePathToRelationships ar fp | Just relType <- filePathToRelType fp
- , Just entry <- findEntryByPath fp ar
- , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
- mapMaybe (relElemToRelationship relType) $ elChildren relElems
-filePathToRelationships _ _ = []
-
-archiveToRelationships :: Archive -> [Relationship]
-archiveToRelationships archive =
- concatMap (filePathToRelationships archive) $ filesInArchive archive
-
-filePathIsMedia :: FilePath -> Bool
-filePathIsMedia fp =
- let (dir, _) = splitFileName fp
- in
- (dir == "word/media/")
-
-lookupLevel :: String -> String -> Numbering -> Maybe Level
-lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
- absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
- lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
- lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
- return lvl
-
-
-numElemToNum :: NameSpaces -> Element -> Maybe Numb
-numElemToNum ns element
- | isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
- absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
- return $ Numb numId absNumId
-numElemToNum _ _ = Nothing
-
-absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
-absNumElemToAbsNum ns element
- | isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
- let levelElems = findChildrenByName ns "w" "lvl" element
- levels = mapMaybe (levelElemToLevel ns) levelElems
- return $ AbstractNumb absNumId levels
-absNumElemToAbsNum _ _ = Nothing
-
-levelElemToLevel :: NameSpaces -> Element -> Maybe Level
-levelElemToLevel ns element
- | isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
- fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
- txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
- let start = findChildByName ns "w" "start" element
- >>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
- return (ilvl, fmt, txt, start)
-levelElemToLevel _ _ = Nothing
-
-archiveToNumbering' :: Archive -> Maybe Numbering
-archiveToNumbering' zf = do
- case findEntryByPath "word/numbering.xml" zf of
- Nothing -> Just $ Numbering [] [] []
- Just entry -> do
- numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = elemToNameSpaces numberingElem
- numElems = findChildrenByName namespaces "w" "num" numberingElem
- absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
- nums = mapMaybe (numElemToNum namespaces) numElems
- absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
- return $ Numbering namespaces nums absNums
-
-archiveToNumbering :: Archive -> Numbering
-archiveToNumbering archive =
- fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
-elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
- let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
- (\a -> Just (a, e)))
- (findChildrenByName ns "w" notetype element)
- in
- Just $ M.fromList $ pairs
-elemToNotes _ _ _ = Nothing
-
-elemToComments :: NameSpaces -> Element -> M.Map String Element
-elemToComments ns element
- | isElem ns "w" "comments" element =
- let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
- (\a -> Just (a, e)))
- (findChildrenByName ns "w" "comment" element)
- in
- M.fromList $ pairs
-elemToComments _ _ = M.empty
-
-
----------------------------------------------
----------------------------------------------
-
-elemToTblGrid :: NameSpaces -> Element -> D TblGrid
-elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
- let cols = findChildrenByName ns "w" "gridCol" element
- in
- mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
- cols
-elemToTblGrid _ _ = throwError WrongElem
-
-elemToTblLook :: NameSpaces -> Element -> D TblLook
-elemToTblLook ns element | isElem ns "w" "tblLook" element =
- let firstRow = findAttrByName ns "w" "firstRow" element
- val = findAttrByName ns "w" "val" element
- firstRowFmt =
- case firstRow of
- Just "1" -> True
- Just _ -> False
- Nothing -> case val of
- Just bitMask -> testBitMask bitMask 0x020
- Nothing -> False
- in
- return $ TblLook{firstRowFormatting = firstRowFmt}
-elemToTblLook _ _ = throwError WrongElem
-
-elemToRow :: NameSpaces -> Element -> D Row
-elemToRow ns element | isElem ns "w" "tr" element =
- do
- let cellElems = findChildrenByName ns "w" "tc" element
- cells <- mapD (elemToCell ns) cellElems
- return $ Row cells
-elemToRow _ _ = throwError WrongElem
-
-elemToCell :: NameSpaces -> Element -> D Cell
-elemToCell ns element | isElem ns "w" "tc" element =
- do
- cellContents <- mapD (elemToBodyPart ns) (elChildren element)
- return $ Cell cellContents
-elemToCell _ _ = throwError WrongElem
-
-elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
-elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just $ ParIndentation {
- leftParIndent =
- findAttrByName ns "w" "left" element >>=
- stringToInteger
- , rightParIndent =
- findAttrByName ns "w" "right" element >>=
- stringToInteger
- , hangingParIndent =
- findAttrByName ns "w" "hanging" element >>=
- stringToInteger}
-elemToParIndentation _ _ = Nothing
-
-testBitMask :: String -> Int -> Bool
-testBitMask bitMaskS n =
- case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
- [] -> False
- ((n', _) : _) -> ((n' .|. n) /= 0)
-
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
-
-elemToBodyPart :: NameSpaces -> Element -> D BodyPart
-elemToBodyPart ns element
- | isElem ns "w" "p" element
- , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
- do
- expsLst <- eitherToD $ readOMML $ showElement c
- return $ OMathPara expsLst
-elemToBodyPart ns element
- | isElem ns "w" "p" element
- , Just (numId, lvl) <- getNumInfo ns element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
- return $ ListItem parstyle numId lvl levelInfo parparts
-elemToBodyPart ns element
- | isElem ns "w" "p" element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- -- Word uses list enumeration for numbered headings, so we only
- -- want to infer a list from the styles if it is NOT a heading.
- case pHeading parstyle of
- Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
- num <- asks envNumbering
- let levelInfo = lookupLevel numId lvl num
- return $ ListItem parstyle numId lvl levelInfo parparts
- _ -> return $ Paragraph parstyle parparts
-elemToBodyPart ns element
- | isElem ns "w" "tbl" element = do
- let caption' = findChildByName ns "w" "tblPr" element
- >>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
- caption = (fromMaybe "" caption')
- grid' = case findChildByName ns "w" "tblGrid" element of
- Just g -> elemToTblGrid ns g
- Nothing -> return []
- tblLook' = case findChildByName ns "w" "tblPr" element >>=
- findChildByName ns "w" "tblLook"
- of
- Just l -> elemToTblLook ns l
- Nothing -> return defaultTblLook
-
- grid <- grid'
- tblLook <- tblLook'
- rows <- mapD (elemToRow ns) (elChildren element)
- return $ Tbl caption grid tblLook rows
-elemToBodyPart _ _ = throwError WrongElem
-
-lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
-lookupRelationship docLocation relid rels =
- lookup (docLocation, relid) pairs
- where
- pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-
-expandDrawingId :: String -> D (FilePath, B.ByteString)
-expandDrawingId s = do
- location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
- case target of
- Just filepath -> do
- bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
- case bytes of
- Just bs -> return (filepath, bs)
- Nothing -> throwError DocxError
- Nothing -> throwError DocxError
-
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
-getTitleAndAlt ns element =
- let mbDocPr = findChildByName ns "wp" "inline" element >>=
- findChildByName ns "wp" "docPr"
- title = case mbDocPr >>= findAttrByName ns "" "title" of
- Just title' -> title'
- Nothing -> ""
- alt = case mbDocPr >>= findAttrByName ns "" "descr" of
- Just alt' -> alt'
- Nothing -> ""
- in (title, alt)
-
-elemToParPart :: NameSpaces -> Element -> D ParPart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
- , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
- = let (title, alt) = getTitleAndAlt ns drawingElem
- a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
- in
- case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
- Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just _ <- findChildByName ns "w" "pict" element =
- let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
- in
- case drawing of
- -- Todo: check out title and attr for deprecated format.
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
- Nothing -> throwError WrongElem
--- Chart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
- , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
-elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
-elemToParPart ns element
- | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Insertion cId cAuthor cDate runs
-elemToParPart ns element
- | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Deletion cId cAuthor cDate runs
-elemToParPart ns element
- | isElem ns "w" "smartTag" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ SmartTag runs
-elemToParPart ns element
- | isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
- return $ BookMark bmId bmName
-elemToParPart ns element
- | isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
- location <- asks envLocation
- runs <- mapD (elemToRun ns) (elChildren element)
- rels <- asks envRelationships
- case lookupRelationship location relId rels of
- Just target -> do
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
- Nothing -> return $ ExternalHyperLink target runs
- Nothing -> return $ ExternalHyperLink "" runs
-elemToParPart ns element
- | isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
-elemToParPart ns element
- | isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
- (Comments _ commentMap) <- asks envComments
- case M.lookup cmtId commentMap of
- Just cmtElem -> elemToCommentStart ns cmtElem
- Nothing -> throwError WrongElem
-elemToParPart ns element
- | isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
- return $ CommentEnd cmtId
-elemToParPart ns element
- | isElem ns "m" "oMath" element =
- (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
-elemToParPart _ _ = throwError WrongElem
-
-elemToCommentStart :: NameSpaces -> Element -> D ParPart
-elemToCommentStart ns element
- | isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
- bps <- mapD (elemToBodyPart ns) (elChildren element)
- return $ CommentStart cmtId cmtAuthor cmtDate bps
-elemToCommentStart _ _ = throwError WrongElem
-
-lookupFootnote :: String -> Notes -> Maybe Element
-lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
-
-lookupEndnote :: String -> Notes -> Maybe Element
-lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
-
-elemToExtent :: Element -> Extent
-elemToExtent drawingElem =
- case (getDim "cx", getDim "cy") of
- (Just w, Just h) -> Just (w, h)
- _ -> Nothing
- where
- wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
- getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
-
-
-childElemToRun :: NameSpaces -> Element -> D Run
-childElemToRun ns element
- | isElem ns "w" "drawing" element
- , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
- , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) element
- = let (title, alt) = getTitleAndAlt ns element
- a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
- in
- case drawing of
- Just s -> expandDrawingId s >>=
- (\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element)
- Nothing -> throwError WrongElem
-childElemToRun ns element
- | isElem ns "w" "drawing" element
- , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
- , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
- = return InlineChart
-childElemToRun ns element
- | isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
- notes <- asks envNotes
- case lookupFootnote fnId notes of
- Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
- return $ Footnote bps
- Nothing -> return $ Footnote []
-childElemToRun ns element
- | isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
- notes <- asks envNotes
- case lookupEndnote enId notes of
- Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
- return $ Endnote bps
- Nothing -> return $ Endnote []
-childElemToRun _ _ = throwError WrongElem
-
-elemToRun :: NameSpaces -> Element -> D Run
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
- do let choices = findChildrenByName ns "mc" "Choice" altCont
- choiceChildren = map head $ filter (not . null) $ map elChildren choices
- outputs <- mapD (childElemToRun ns) choiceChildren
- case outputs of
- r : _ -> return r
- [] -> throwError WrongElem
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element =
- childElemToRun ns drawingElem
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChildByName ns "w" "footnoteReference" element =
- childElemToRun ns ref
-elemToRun ns element
- | isElem ns "w" "r" element
- , Just ref <- findChildByName ns "w" "endnoteReference" element =
- childElemToRun ns ref
-elemToRun ns element
- | isElem ns "w" "r" element = do
- runElems <- elemToRunElems ns element
- runStyle <- elemToRunStyleD ns element
- return $ Run runStyle runElems
-elemToRun _ _ = throwError WrongElem
-
-getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
-getParentStyleValue field style
- | Just value <- field style = Just value
- | Just parentStyle <- psStyle style
- = getParentStyleValue field (snd parentStyle)
-getParentStyleValue _ _ = Nothing
-
-getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
- Maybe a
-getParStyleField field stylemap styles
- | x <- mapMaybe (\x -> M.lookup x stylemap) styles
- , (y:_) <- mapMaybe (getParentStyleValue field) x
- = Just y
-getParStyleField _ _ _ = Nothing
-
-elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
-elemToParagraphStyle ns element sty
- | Just pPr <- findChildByName ns "w" "pPr" element =
- let style =
- mapMaybe
- (findAttrByName ns "w" "val")
- (findChildrenByName ns "w" "pStyle" pPr)
- in ParagraphStyle
- {pStyle = style
- , indentation =
- findChildByName ns "w" "ind" pPr >>=
- elemToParIndentation ns
- , dropCap =
- case
- findChildByName ns "w" "framePr" pPr >>=
- findAttrByName ns "w" "dropCap"
- of
- Just "none" -> False
- Just _ -> True
- Nothing -> False
- , pHeading = getParStyleField headingLev sty style
- , pNumInfo = getParStyleField numInfo sty style
- , pBlockQuote = getParStyleField isBlockQuote sty style
- }
-elemToParagraphStyle _ _ _ = defaultParagraphStyle
-
-checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
-checkOnOff ns rPr tag
- | Just t <- findChild tag rPr
- , Just val <- findAttrByName ns "w" "val" t =
- Just $ case val of
- "true" -> True
- "false" -> False
- "on" -> True
- "off" -> False
- "1" -> True
- "0" -> False
- _ -> False
- | Just _ <- findChild tag rPr = Just True
-checkOnOff _ _ _ = Nothing
-
-elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
-elemToRunStyleD ns element
- | Just rPr <- findChildByName ns "w" "rPr" element = do
- charStyles <- asks envCharStyles
- let parentSty = case
- findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val"
- of
- Just styName | Just style <- M.lookup styName charStyles ->
- Just (styName, style)
- _ -> Nothing
- return $ elemToRunStyle ns element parentSty
-elemToRunStyleD _ _ = return defaultRunStyle
-
-elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
-elemToRunStyle ns element parentStyle
- | Just rPr <- findChildByName ns "w" "rPr" element =
- RunStyle
- {
- isBold = checkOnOff ns rPr (elemName ns "w" "b")
- , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
- , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
- , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
- , rVertAlign =
- findChildByName ns "w" "vertAlign" rPr >>=
- findAttrByName ns "w" "val" >>=
- \v -> Just $ case v of
- "superscript" -> SupScrpt
- "subscript" -> SubScrpt
- _ -> BaseLn
- , rUnderline =
- findChildByName ns "w" "u" rPr >>=
- findAttrByName ns "w" "val"
- , rStyle = parentStyle
- }
-elemToRunStyle _ _ _ = defaultRunStyle
-
-isNumericNotNull :: String -> Bool
-isNumericNotNull str = (str /= []) && (all isDigit str)
-
-getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
-getHeaderLevel ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- stripPrefix "Heading" styleId
- , isNumericNotNull index = Just (styleId, read index)
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , Just index <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val" >>=
- stripPrefix "heading "
- , isNumericNotNull index = Just (styleId, read index)
-getHeaderLevel _ _ = Nothing
-
-blockQuoteStyleIds :: [String]
-blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
-
-blockQuoteStyleNames :: [String]
-blockQuoteStyleNames = ["Quote", "Block Text"]
-
-getBlockQuote :: NameSpaces -> Element -> Maybe Bool
-getBlockQuote ns element
- | Just styleId <- findAttrByName ns "w" "styleId" element
- , styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChildByName ns "w" "name" element >>=
- findAttrByName ns "w" "val"
- , styleName `elem` blockQuoteStyleNames = Just True
-getBlockQuote _ _ = Nothing
-
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-getNumInfo ns element = do
- let numPr = findChildByName ns "w" "pPr" element >>=
- findChildByName ns "w" "numPr"
- lvl = fromMaybe "0" (numPr >>=
- findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
- numId <- numPr >>=
- findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
- return (numId, lvl)
-
-
-elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
-elemToParStyleData ns element parentStyle =
- ParStyleData
- {
- headingLev = getHeaderLevel ns element
- , isBlockQuote = getBlockQuote ns element
- , numInfo = getNumInfo ns element
- , psStyle = parentStyle
- }
-
-elemToRunElem :: NameSpaces -> Element -> D RunElem
-elemToRunElem ns element
- | isElem ns "w" "t" element
- || isElem ns "w" "delText" element
- || isElem ns "m" "t" element = do
- let str = strContent element
- font <- asks envFont
- case font of
- Nothing -> return $ TextRun str
- Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
- | isElem ns "w" "br" element = return LnBrk
- | isElem ns "w" "tab" element = return Tab
- | isElem ns "w" "softHyphen" element = return SoftHyphen
- | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
- | isElem ns "w" "sym" element = return (getSymChar ns element)
- | otherwise = throwError WrongElem
- where
- lowerFromPrivate (ord -> c)
- | c >= ord '\xF000' = chr $ c - ord '\xF000'
- | otherwise = chr c
-
--- The char attribute is a hex string
-getSymChar :: NameSpaces -> Element -> RunElem
-getSymChar ns element
- | Just s <- lowerFromPrivate <$> getCodepoint
- , Just font <- getFont =
- let [(char, _)] = readLitChar ("\\x" ++ s) in
- TextRun . maybe "" (:[]) $ getUnicode font char
- where
- getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
- lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
-getSymChar _ _ = TextRun ""
-
-elemToRunElems :: NameSpaces -> Element -> D [RunElem]
-elemToRunElems ns element
- | isElem ns "w" "r" element
- || isElem ns "m" "r" element = do
- let qualName = elemName ns "w"
- let font = do
- fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
- (foldr (<|>) Nothing $
- map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
- local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
-elemToRunElems _ _ = throwError WrongElem
-
-setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
-setFont f s = s{envFont = f}
-
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
deleted file mode 100644
index 00906cf07..000000000
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ /dev/null
@@ -1,108 +0,0 @@
-module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
- , alterMap
- , getMap
- , defaultStyleMaps
- , getStyleMaps
- , getStyleId
- , hasStyleName
- ) where
-
-import Text.XML.Light
-import Text.Pandoc.Readers.Docx.Util
-import Control.Monad.State
-import Data.Char (toLower)
-import qualified Data.Map as M
-
-newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
-newtype CharStyleMap = CharStyleMap ( M.Map String String )
-
-class StyleMap a where
- alterMap :: (M.Map String String -> M.Map String String) -> a -> a
- getMap :: a -> M.Map String String
-
-instance StyleMap ParaStyleMap where
- alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
- getMap (ParaStyleMap m) = m
-
-instance StyleMap CharStyleMap where
- alterMap f (CharStyleMap m) = CharStyleMap $ f m
- getMap (CharStyleMap m) = m
-
-insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
-insert (Just k) (Just v) m = alterMap (M.insert k v) m
-insert _ _ m = m
-
-getStyleId :: (StyleMap a) => String -> a -> String
-getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
-
-hasStyleName :: (StyleMap a) => String -> a -> Bool
-hasStyleName styleName = M.member (map toLower styleName) . getMap
-
-data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
- , sParaStyleMap :: ParaStyleMap
- , sCharStyleMap :: CharStyleMap
- }
-
-data StyleType = ParaStyle | CharStyle
-
-defaultStyleMaps :: StyleMaps
-defaultStyleMaps = StyleMaps { sNameSpaces = []
- , sParaStyleMap = ParaStyleMap M.empty
- , sCharStyleMap = CharStyleMap M.empty
- }
-
-type StateM a = State StyleMaps a
-
-getStyleMaps :: Element -> StyleMaps
-getStyleMaps docElem = execState genStyleMap state'
- where
- state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
- genStyleItem e = do
- styleType <- getStyleType e
- styleId <- getAttrStyleId e
- nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
- case styleType of
- Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
- Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
- _ -> return ()
- genStyleMap = do
- style <- elemName' "style"
- let styles = findChildren style docElem
- forM_ styles genStyleItem
-
-modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
-modParaStyleMap f = modify $ \s ->
- s {sParaStyleMap = f $ sParaStyleMap s}
-
-modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
-modCharStyleMap f = modify $ \s ->
- s {sCharStyleMap = f $ sCharStyleMap s}
-
-getStyleType :: Element -> StateM (Maybe StyleType)
-getStyleType e = do
- styleTypeStr <- getAttrType e
- case styleTypeStr of
- Just "paragraph" -> return $ Just ParaStyle
- Just "character" -> return $ Just CharStyle
- _ -> return Nothing
-
-getAttrType :: Element -> StateM (Maybe String)
-getAttrType el = do
- name <- elemName' "type"
- return $ findAttr name el
-
-getAttrStyleId :: Element -> StateM (Maybe String)
-getAttrStyleId el = do
- name <- elemName' "styleId"
- return $ findAttr name el
-
-getNameVal :: Element -> StateM (Maybe String)
-getNameVal el = do
- name <- elemName' "name"
- val <- elemName' "val"
- return $ findChild name el >>= findAttr val
-
-elemName' :: String -> StateM QName
-elemName' name = do
- namespaces <- gets sNameSpaces
- return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
deleted file mode 100644
index 6646e5b7f..000000000
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ /dev/null
@@ -1,47 +0,0 @@
-module Text.Pandoc.Readers.Docx.Util (
- NameSpaces
- , elemName
- , isElem
- , elemToNameSpaces
- , findChildByName
- , findChildrenByName
- , findAttrByName
- ) where
-
-import Text.XML.Light
-import Data.Maybe (mapMaybe)
-
-type NameSpaces = [(String, String)]
-
-elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name =
- QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- let ns' = ns ++ elemToNameSpaces element
- in qName (elName element) == name &&
- qURI (elName element) == lookup prefix ns'
-
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
-findChildByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findChild (elemName ns' pref name) el
-
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
-findChildrenByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findChildren (elemName ns' pref name) el
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
-findAttrByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
- in findAttr (elemName ns' pref name) el
-
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
deleted file mode 100644
index 2eaa842b6..000000000
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ /dev/null
@@ -1,279 +0,0 @@
-{-# LANGUAGE
- ViewPatterns
- , StandaloneDeriving
- , TupleSections
- , FlexibleContexts #-}
-
-module Text.Pandoc.Readers.EPUB
- (readEPUB)
- where
-
-import Text.XML.Light
-import Text.Pandoc.Definition hiding (Attr)
-import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Walk (walk, query)
-import Text.Pandoc.Options ( ReaderOptions(..))
-import Text.Pandoc.Extensions (enableExtension, Extension(Ext_raw_html))
-import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
-import Network.URI (unEscapeString)
-import Text.Pandoc.MediaBag (MediaBag, insertMedia)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.MIME (MimeType)
-import qualified Text.Pandoc.Builder as B
-import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry
- , findEntryByPath, Entry)
-import qualified Data.ByteString.Lazy as BL (ByteString)
-import System.FilePath ( takeFileName, (</>), dropFileName, normalise
- , dropFileName
- , splitFileName )
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
-import Control.Monad (guard, liftM)
-import Data.List (isPrefixOf, isInfixOf)
-import Data.Maybe (mapMaybe, fromMaybe)
-import qualified Data.Map as M (Map, lookup, fromList, elems)
-import Data.Monoid ((<>))
-import Control.DeepSeq (deepseq, NFData)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad)
-import qualified Text.Pandoc.Class as P
-
-type Items = M.Map String (FilePath, MimeType)
-
-readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
-readEPUB opts bytes = case toArchiveOrFail bytes of
- Right archive -> archiveToEPUB opts $ archive
- Left _ -> throwError $ PandocParseError "Couldn't extract ePub file"
-
--- runEPUB :: Except PandocError a -> Either PandocError a
--- runEPUB = runExcept
-
--- Note that internal reference are aggresively normalised so that all ids
--- are of the form "filename#id"
---
-archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
-archiveToEPUB os archive = do
- -- root is path to folder with manifest file in
- (root, content) <- getManifest archive
- meta <- parseMeta content
- (cover, items) <- parseManifest content
- -- No need to collapse here as the image path is from the manifest file
- let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
- spine <- parseSpine items content
- let escapedSpine = map (escapeURI . takeFileName . fst) spine
- Pandoc _ bs <-
- foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
- `liftM` parseSpineElem root b) mempty spine
- let ast = coverDoc <> (Pandoc meta bs)
- P.setMediaBag $ fetchImages (M.elems items) root archive ast
- return ast
- where
- os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)}
- parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
- parseSpineElem (normalise -> r) (normalise -> path, mime) = do
- doc <- mimeToReader mime r path
- let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
- return $ docSpan <> doc
- mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
- mimeToReader "application/xhtml+xml" (unEscapeString -> root)
- (unEscapeString -> path) = do
- fname <- findEntryByPathE (root </> path) archive
- html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname
- return $ fixInternalReferences path html
- mimeToReader s _ (unEscapeString -> path)
- | s `elem` imageMimes = return $ imageToPandoc path
- | otherwise = return $ mempty
-
--- paths should be absolute when this function is called
--- renameImages should do this
-fetchImages :: [(FilePath, MimeType)]
- -> FilePath -- ^ Root
- -> Archive
- -> Pandoc
- -> MediaBag
-fetchImages mimes root arc (query iq -> links) =
- foldr (uncurry3 insertMedia) mempty
- (mapMaybe getEntry links)
- where
- getEntry link =
- let abslink = normalise (root </> link) in
- (link , lookup link mimes, ) . fromEntry
- <$> findEntryByPath abslink arc
-
-iq :: Inline -> [FilePath]
-iq (Image _ _ (url, _)) = [url]
-iq _ = []
-
--- Remove relative paths
-renameImages :: FilePath -> Inline -> Inline
-renameImages root img@(Image attr a (url, b))
- | "data:" `isPrefixOf` url = img
- | otherwise = Image attr a (collapseFilePath (root </> url), b)
-renameImages _ x = x
-
-imageToPandoc :: FilePath -> Pandoc
-imageToPandoc s = B.doc . B.para $ B.image s "" mempty
-
-imageMimes :: [MimeType]
-imageMimes = ["image/gif", "image/jpeg", "image/png"]
-
-type CoverImage = FilePath
-
-parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items)
-parseManifest content = do
- manifest <- findElementE (dfName "manifest") content
- let items = findChildren (dfName "item") manifest
- r <- mapM parseItem items
- let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
- return (cover, (M.fromList r))
- where
- findCover e = maybe False (isInfixOf "cover-image")
- (findAttr (emptyName "properties") e)
- parseItem e = do
- uid <- findAttrE (emptyName "id") e
- href <- findAttrE (emptyName "href") e
- mime <- findAttrE (emptyName "media-type") e
- return (uid, (href, mime))
-
-parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
-parseSpine is e = do
- spine <- findElementE (dfName "spine") e
- let itemRefs = findChildren (dfName "itemref") spine
- mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs
- where
- parseItemRef ref = do
- let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref)
- guard linear
- findAttr (emptyName "idref") ref
-
-parseMeta :: PandocMonad m => Element -> m Meta
-parseMeta content = do
- meta <- findElementE (dfName "metadata") content
- let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
- dcspace _ = False
- let dcs = filterChildrenName dcspace meta
- let r = foldr parseMetaItem nullMeta dcs
- return r
-
--- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
-parseMetaItem :: Element -> Meta -> Meta
-parseMetaItem e@(stripNamespace . elName -> field) meta =
- addMetaField (renameMeta field) (B.str $ strContent e) meta
-
-renameMeta :: String -> String
-renameMeta "creator" = "author"
-renameMeta s = s
-
-getManifest :: PandocMonad m => Archive -> m (String, Element)
-getManifest archive = do
- metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
- docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
- ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
- as <- liftM ((map attrToPair) . elAttribs)
- (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
- manifestFile <- mkE "Root not found" (lookup "full-path" as)
- let rootdir = dropFileName manifestFile
- --mime <- lookup "media-type" as
- manifest <- findEntryByPathE manifestFile archive
- liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
-
--- Fixup
-
-fixInternalReferences :: FilePath -> Pandoc -> Pandoc
-fixInternalReferences pathToFile =
- (walk $ renameImages root)
- . (walk $ fixBlockIRs filename)
- . (walk $ fixInlineIRs filename)
- where
- (root, escapeURI -> filename) = splitFileName pathToFile
-
-fixInlineIRs :: String -> Inline -> Inline
-fixInlineIRs s (Span as v) =
- Span (fixAttrs s as) v
-fixInlineIRs s (Code as code) =
- Code (fixAttrs s as) code
-fixInlineIRs s (Link as is ('#':url, tit)) =
- Link (fixAttrs s as) is (addHash s url, tit)
-fixInlineIRs s (Link as is t) =
- Link (fixAttrs s as) is t
-fixInlineIRs _ v = v
-
-prependHash :: [String] -> Inline -> Inline
-prependHash ps l@(Link attr is (url, tit))
- | or [s `isPrefixOf` url | s <- ps] =
- Link attr is ('#':url, tit)
- | otherwise = l
-prependHash _ i = i
-
-fixBlockIRs :: String -> Block -> Block
-fixBlockIRs s (Div as b) =
- Div (fixAttrs s as) b
-fixBlockIRs s (Header i as b) =
- Header i (fixAttrs s as) b
-fixBlockIRs s (CodeBlock as code) =
- CodeBlock (fixAttrs s as) code
-fixBlockIRs _ b = b
-
-fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
-
-addHash :: String -> String -> String
-addHash _ "" = ""
-addHash s ident = takeFileName s ++ "#" ++ ident
-
-removeEPUBAttrs :: [(String, String)] -> [(String, String)]
-removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
-
-isEPUBAttr :: (String, String) -> Bool
-isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
-
--- Library
-
--- Strict version of foldM
-foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
-foldM' _ z [] = return z
-foldM' f z (x:xs) = do
- z' <- f z x
- z' `deepseq` foldM' f z' xs
-
-uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
-uncurry3 f (a, b, c) = f a b c
-
--- Utility
-
-stripNamespace :: QName -> String
-stripNamespace (QName v _ _) = v
-
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
-attrToNSPair _ = Nothing
-
-attrToPair :: Attr -> (String, String)
-attrToPair (Attr (QName name _ _) val) = (name, val)
-
-defaultNameSpace :: Maybe String
-defaultNameSpace = Just "http://www.idpf.org/2007/opf"
-
-dfName :: String -> QName
-dfName s = QName s defaultNameSpace Nothing
-
-emptyName :: String -> QName
-emptyName s = QName s Nothing Nothing
-
--- Convert Maybe interface to Either
-
-findAttrE :: PandocMonad m => QName -> Element -> m String
-findAttrE q e = mkE "findAttr" $ findAttr q e
-
-findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
-findEntryByPathE (normalise -> path) a =
- mkE ("No entry on path: " ++ path) $ findEntryByPath path a
-
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
-
-findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
-
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
deleted file mode 100644
index f02f1a1d4..000000000
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ /dev/null
@@ -1,1136 +0,0 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
-ViewPatterns#-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of HTML to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.HTML ( readHtml
- , htmlTag
- , htmlInBalanced
- , isInlineTag
- , isBlockTag
- , isTextTag
- , isCommentTag
- ) where
-
-import Text.HTML.TagSoup
-import Text.HTML.TagSoup.Match
-import Text.Pandoc.Definition
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
-import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
- , escapeURI, safeRead )
-import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
- Extension (Ext_epub_html_exts,
- Ext_raw_html, Ext_native_divs, Ext_native_spans))
-import Text.Pandoc.Logging
-import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Walk
-import qualified Data.Map as M
-import Data.Maybe ( fromMaybe, isJust)
-import Data.List ( intercalate, isInfixOf, isPrefixOf )
-import Data.Char ( isDigit )
-import Control.Monad ( guard, mzero, void, unless )
-import Control.Arrow ((***))
-import Control.Applicative ( (<|>) )
-import Data.Monoid (First (..))
-import Text.TeXMath (readMathML, writeTeX)
-import Data.Default (Default (..), def)
-import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift)
-import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
-import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
-import Data.Monoid ((<>))
-import Text.Parsec.Error
-import qualified Data.Set as Set
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import Control.Monad.Except (throwError)
-
--- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> m Pandoc
-readHtml opts inp = do
- let tags = stripPrefixes . canonicalizeTags $
- parseTagsOptions parseOptions{ optTagPosition = True } inp
- parseDoc = do
- blocks <- (fixPlains False) . mconcat <$> manyTill block eof
- meta <- stateMeta . parserState <$> getState
- bs' <- replaceNotes (B.toList blocks)
- return $ Pandoc meta bs'
- getError (errorMessages -> ms) = case ms of
- [] -> ""
- (m:_) -> messageString m
- result <- flip runReaderT def $
- runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
- "source" tags
- case result of
- Right doc -> return doc
- Left err -> throwError $ PandocParseError $ getError err
-
-replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
-replaceNotes = walkM replaceNotes'
-
-replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline
-replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
- where
- getNotes = noteTable <$> getState
-replaceNotes' x = return x
-
-data HTMLState =
- HTMLState
- { parserState :: ParserState,
- noteTable :: [(String, Blocks)],
- baseHref :: Maybe URI,
- identifiers :: Set.Set String,
- headerMap :: M.Map Inlines String
- }
-
-data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
- , inChapter :: Bool -- ^ Set if in chapter section
- , inPlain :: Bool -- ^ Set if in pPlain
- }
-
-setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
-setInChapter = local (\s -> s {inChapter = True})
-
-setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
-setInPlain = local (\s -> s {inPlain = True})
-
-type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
-
-type TagParser m = HTMLParser m [Tag String]
-
-pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
-
-pHead :: PandocMonad m => TagParser m Blocks
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
- where pTitle = pInTags "title" inline >>= setTitle . trimInlines
- setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
- pMetaTag = do
- mt <- pSatisfy (~== TagOpen "meta" [])
- let name = fromAttrib "name" mt
- if null name
- then return mempty
- else do
- let content = fromAttrib "content" mt
- updateState $ \s ->
- let ps = parserState s in
- s{ parserState = ps{
- stateMeta = addMetaField name (B.text content)
- (stateMeta ps) } }
- return mempty
- pBaseTag = do
- bt <- pSatisfy (~== TagOpen "base" [])
- updateState $ \st -> st{ baseHref =
- parseURIReference $ fromAttrib "href" bt }
- return mempty
-
-block :: PandocMonad m => TagParser m Blocks
-block = do
- pos <- getPosition
- res <- choice
- [ eSection
- , eSwitch B.para block
- , mempty <$ eFootnote
- , mempty <$ eTOC
- , mempty <$ eTitlePage
- , pPara
- , pHeader
- , pBlockQuote
- , pCodeBlock
- , pList
- , pHrule
- , pTable
- , pHead
- , pBody
- , pDiv
- , pPlain
- , pRawHtmlBlock
- ]
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
- return res
-
-namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
-namespaces = [(mathMLNamespace, pMath True)]
-
-mathMLNamespace :: String
-mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
-
-eSwitch :: (PandocMonad m, Monoid a)
- => (Inlines -> a)
- -> TagParser m a
- -> TagParser m a
-eSwitch constructor parser = try $ do
- guardEnabled Ext_epub_html_exts
- pSatisfy (~== TagOpen "switch" [])
- cases <- getFirst . mconcat <$>
- manyTill (First <$> (eCase <* skipMany pBlank) )
- (lookAhead $ try $ pSatisfy (~== TagOpen "default" []))
- skipMany pBlank
- fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank)
- skipMany pBlank
- pSatisfy (~== TagClose "switch")
- return $ maybe fallback constructor cases
-
-eCase :: PandocMonad m => TagParser m (Maybe Inlines)
-eCase = do
- skipMany pBlank
- TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
- case (flip lookup namespaces) =<< lookup "required-namespace" attr of
- Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
- Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
-
-eFootnote :: PandocMonad m => TagParser m ()
-eFootnote = try $ do
- let notes = ["footnote", "rearnote"]
- guardEnabled Ext_epub_html_exts
- (TagOpen tag attr) <- lookAhead $ pAnyTag
- guard (maybe False (flip elem notes) (lookup "type" attr))
- let ident = fromMaybe "" (lookup "id" attr)
- content <- pInTags tag block
- addNote ident content
-
-addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
-addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
-
-eNoteref :: PandocMonad m => TagParser m Inlines
-eNoteref = try $ do
- guardEnabled Ext_epub_html_exts
- TagOpen tag attr <- lookAhead $ pAnyTag
- guard (maybe False (== "noteref") (lookup "type" attr))
- let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
- guard (not (null ident))
- pInTags tag block
- return $ B.rawInline "noteref" ident
-
--- Strip TOC if there is one, better to generate again
-eTOC :: PandocMonad m => TagParser m ()
-eTOC = try $ do
- guardEnabled Ext_epub_html_exts
- (TagOpen tag attr) <- lookAhead $ pAnyTag
- guard (maybe False (== "toc") (lookup "type" attr))
- void (pInTags tag block)
-
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
-pBulletList :: PandocMonad m => TagParser m Blocks
-pBulletList = try $ do
- pSatisfy (~== TagOpen "ul" [])
- let nonItem = pSatisfy (\t ->
- not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
- not (t ~== TagClose "ul"))
- -- note: if they have an <ol> or <ul> not in scope of a <li>,
- -- treat it as a list item, though it's not valid xhtml...
- skipMany nonItem
- items <- manyTill (pListItem nonItem) (pCloses "ul")
- return $ B.bulletList $ map (fixPlains True) items
-
-pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
-pListItem nonItem = do
- TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
- let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
- (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
-
-parseListStyleType :: String -> ListNumberStyle
-parseListStyleType "lower-roman" = LowerRoman
-parseListStyleType "upper-roman" = UpperRoman
-parseListStyleType "lower-alpha" = LowerAlpha
-parseListStyleType "upper-alpha" = UpperAlpha
-parseListStyleType "decimal" = Decimal
-parseListStyleType _ = DefaultStyle
-
-parseTypeAttr :: String -> ListNumberStyle
-parseTypeAttr "i" = LowerRoman
-parseTypeAttr "I" = UpperRoman
-parseTypeAttr "a" = LowerAlpha
-parseTypeAttr "A" = UpperAlpha
-parseTypeAttr "1" = Decimal
-parseTypeAttr _ = DefaultStyle
-
-pOrderedList :: PandocMonad m => TagParser m Blocks
-pOrderedList = try $ do
- TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
- let (start, style) = (sta', sty')
- where sta = fromMaybe "1" $
- lookup "start" attribs
- sta' = if all isDigit sta
- then read sta
- else 1
-
- pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
-
- typeAttr = fromMaybe "" $ lookup "type" attribs
- classAttr = fromMaybe "" $ lookup "class" attribs
- styleAttr = fromMaybe "" $ lookup "style" attribs
- listStyle = fromMaybe "" $ pickListStyle styleAttr
-
- sty' = foldOrElse DefaultStyle
- [ parseTypeAttr typeAttr
- , parseListStyleType classAttr
- , parseListStyleType listStyle
- ]
- let nonItem = pSatisfy (\t ->
- not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
- not (t ~== TagClose "ol"))
- -- note: if they have an <ol> or <ul> not in scope of a <li>,
- -- treat it as a list item, though it's not valid xhtml...
- skipMany nonItem
- items <- manyTill (pListItem nonItem) (pCloses "ol")
- return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
-
-pDefinitionList :: PandocMonad m => TagParser m Blocks
-pDefinitionList = try $ do
- pSatisfy (~== TagOpen "dl" [])
- items <- manyTill pDefListItem (pCloses "dl")
- return $ B.definitionList items
-
-pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks])
-pDefListItem = try $ do
- let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
- not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
- terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
- defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
- skipMany nonItem
- let term = foldl1 (\x y -> x <> B.linebreak <> y) terms
- return (term, map (fixPlains True) defs)
-
-fixPlains :: Bool -> Blocks -> Blocks
-fixPlains inList bs = if any isParaish bs'
- then B.fromList $ map plainToPara bs'
- else bs
- where isParaish (Para _) = True
- isParaish (CodeBlock _ _) = True
- isParaish (Header _ _ _) = True
- isParaish (BlockQuote _) = True
- isParaish (BulletList _) = not inList
- isParaish (OrderedList _ _) = not inList
- isParaish (DefinitionList _) = not inList
- isParaish _ = False
- plainToPara (Plain xs) = Para xs
- plainToPara x = x
- bs' = B.toList bs
-
-pRawTag :: PandocMonad m => TagParser m String
-pRawTag = do
- tag <- pAnyTag
- let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
- if tagOpen ignorable (const True) tag || tagClose ignorable tag
- then return []
- else return $ renderTags' [tag]
-
-pDiv :: PandocMonad m => TagParser m Blocks
-pDiv = try $ do
- guardEnabled Ext_native_divs
- let isDivLike "div" = True
- isDivLike "section" = True
- isDivLike _ = False
- TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
- contents <- pInTags tag block
- let (ident, classes, kvs) = mkAttr attr
- let classes' = if tag == "section"
- then "section":classes
- else classes
- return $ B.divWith (ident, classes', kvs) contents
-
-pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
-pRawHtmlBlock = do
- raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
- exts <- getOption readerExtensions
- if extensionEnabled Ext_raw_html exts && not (null raw)
- then return $ B.rawBlock "html" raw
- else ignore raw
-
-ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
-ignore raw = do
- pos <- getPosition
- -- raw can be null for tags like <!DOCTYPE>; see paRawTag
- -- in this case we don't want a warning:
- unless (null raw) $
- report $ SkippedContent raw pos
- return mempty
-
-pHtmlBlock :: PandocMonad m => String -> TagParser m String
-pHtmlBlock t = try $ do
- open <- pSatisfy (~== TagOpen t [])
- contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
- return $ renderTags' $ [open] ++ contents ++ [TagClose t]
-
--- Sets chapter context
-eSection :: PandocMonad m => TagParser m Blocks
-eSection = try $ do
- let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
- let sectTag = tagOpen (`elem` sectioningContent) matchChapter
- TagOpen tag _ <- lookAhead $ pSatisfy sectTag
- setInChapter (pInTags tag block)
-
-headerLevel :: PandocMonad m => String -> TagParser m Int
-headerLevel tagtype = do
- let level = read (drop 1 tagtype)
- (try $ do
- guardEnabled Ext_epub_html_exts
- asks inChapter >>= guard
- return (level - 1))
- <|>
- return level
-
-eTitlePage :: PandocMonad m => TagParser m ()
-eTitlePage = try $ do
- let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as)
- let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")
- isTitlePage
- TagOpen tag _ <- lookAhead $ pSatisfy groupTag
- () <$ pInTags tag block
-
-pHeader :: PandocMonad m => TagParser m Blocks
-pHeader = try $ do
- TagOpen tagtype attr <- pSatisfy $
- tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
- (const True)
- let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
- level <- headerLevel tagtype
- contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
- let ident = fromMaybe "" $ lookup "id" attr
- let classes = maybe [] words $ lookup "class" attr
- let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
- attr' <- registerHeader (ident, classes, keyvals) contents
- return $ if bodyTitle
- then mempty -- skip a representation of the title in the body
- else B.headerWith attr' level contents
-
-pHrule :: PandocMonad m => TagParser m Blocks
-pHrule = do
- pSelfClosing (=="hr") (const True)
- return B.horizontalRule
-
-pTable :: PandocMonad m => TagParser m Blocks
-pTable = try $ do
- TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
- skipMany pBlank
- caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
- widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
- let pTh = option [] $ pInTags "tr" (pCell "th")
- pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
- pTBody = do pOptInTag "tbody" $ many1 pTr
- head'' <- pOptInTag "thead" pTh
- head' <- pOptInTag "tbody" $ do
- if null head''
- then pTh
- else return head''
- rowsLs <- many pTBody
- rows' <- pOptInTag "tfoot" $ many pTr
- TagClose _ <- pSatisfy (~== TagClose "table")
- let rows'' = (concat rowsLs) ++ rows'
- -- fail on empty table
- guard $ not $ null head' && null rows''
- let isSinglePlain x = case B.toList x of
- [] -> True
- [Plain _] -> True
- _ -> False
- let isSimple = all isSinglePlain $ concat (head':rows'')
- let cols = length $ if null head' then head rows'' else head'
- -- add empty cells to short rows
- let addEmpties r = case cols - length r of
- n | n > 0 -> r ++ replicate n mempty
- | otherwise -> r
- let rows = map addEmpties rows''
- let aligns = replicate cols AlignDefault
- let widths = if null widths'
- then if isSimple
- then replicate cols 0
- else replicate cols (1.0 / fromIntegral cols)
- else widths'
- return $ B.table caption (zip aligns widths) head' rows
-
-pCol :: PandocMonad m => TagParser m Double
-pCol = try $ do
- TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
- skipMany pBlank
- optional $ pSatisfy (~== TagClose "col")
- skipMany pBlank
- return $ case lookup "width" attribs of
- Nothing -> case lookup "style" attribs of
- Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
- fromMaybe 0.0 $ safeRead ('0':'.':filter
- (`notElem` " \t\r\n%'\";") xs)
- _ -> 0.0
- Just x | not (null x) && last x == '%' ->
- fromMaybe 0.0 $ safeRead ('0':'.':init x)
- _ -> 0.0
-
-pColgroup :: PandocMonad m => TagParser m [Double]
-pColgroup = try $ do
- pSatisfy (~== TagOpen "colgroup" [])
- skipMany pBlank
- manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
-
-noColOrRowSpans :: Tag String -> Bool
-noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
- where isNullOrOne x = case fromAttrib x t of
- "" -> True
- "1" -> True
- _ -> False
-
-pCell :: PandocMonad m => String -> TagParser m [Blocks]
-pCell celltype = try $ do
- skipMany pBlank
- res <- pInTags' celltype noColOrRowSpans block
- skipMany pBlank
- return [res]
-
-pBlockQuote :: PandocMonad m => TagParser m Blocks
-pBlockQuote = do
- contents <- pInTags "blockquote" block
- return $ B.blockQuote $ fixPlains False contents
-
-pPlain :: PandocMonad m => TagParser m Blocks
-pPlain = do
- contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
- if B.isNull contents
- then return mempty
- else return $ B.plain contents
-
-pPara :: PandocMonad m => TagParser m Blocks
-pPara = do
- contents <- trimInlines <$> pInTags "p" inline
- return $ B.para contents
-
-pCodeBlock :: PandocMonad m => TagParser m Blocks
-pCodeBlock = try $ do
- TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
- contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
- let rawText = concatMap tagToString contents
- -- drop leading newline if any
- let result' = case rawText of
- '\n':xs -> xs
- _ -> rawText
- -- drop trailing newline if any
- let result = case reverse result' of
- '\n':_ -> init result'
- _ -> result'
- return $ B.codeBlockWith (mkAttr attr) result
-
-tagToString :: Tag String -> String
-tagToString (TagText s) = s
-tagToString (TagOpen "br" _) = "\n"
-tagToString _ = ""
-
-inline :: PandocMonad m => TagParser m Inlines
-inline = choice
- [ eNoteref
- , eSwitch id inline
- , pTagText
- , pQ
- , pEmph
- , pStrong
- , pSuperscript
- , pSubscript
- , pStrikeout
- , pLineBreak
- , pLink
- , pImage
- , pCode
- , pSpan
- , pMath False
- , pRawHtmlInline
- ]
-
-pLocation :: PandocMonad m => TagParser m ()
-pLocation = do
- (TagPosition r c) <- pSat isTagPosition
- setPosition $ newPos "input" r c
-
-pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
-pSat f = do
- pos <- getPosition
- token show (const pos) (\x -> if f x then Just x else Nothing)
-
-pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
-pSatisfy f = try $ optional pLocation >> pSat f
-
-pAnyTag :: PandocMonad m => TagParser m (Tag String)
-pAnyTag = pSatisfy (const True)
-
-pSelfClosing :: PandocMonad m
- => (String -> Bool) -> ([Attribute String] -> Bool)
- -> TagParser m (Tag String)
-pSelfClosing f g = do
- open <- pSatisfy (tagOpen f g)
- optional $ pSatisfy (tagClose f)
- return open
-
-pQ :: PandocMonad m => TagParser m Inlines
-pQ = do
- context <- asks quoteContext
- let quoteType = case context of
- InDoubleQuote -> SingleQuote
- _ -> DoubleQuote
- let innerQuoteContext = if quoteType == SingleQuote
- then InSingleQuote
- else InDoubleQuote
- let constructor = case quoteType of
- SingleQuote -> B.singleQuoted
- DoubleQuote -> B.doubleQuoted
- withQuoteContext innerQuoteContext $
- pInlinesInTags "q" constructor
-
-pEmph :: PandocMonad m => TagParser m Inlines
-pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
-
-pStrong :: PandocMonad m => TagParser m Inlines
-pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
-
-pSuperscript :: PandocMonad m => TagParser m Inlines
-pSuperscript = pInlinesInTags "sup" B.superscript
-
-pSubscript :: PandocMonad m => TagParser m Inlines
-pSubscript = pInlinesInTags "sub" B.subscript
-
-pStrikeout :: PandocMonad m => TagParser m Inlines
-pStrikeout = do
- pInlinesInTags "s" B.strikeout <|>
- pInlinesInTags "strike" B.strikeout <|>
- pInlinesInTags "del" B.strikeout <|>
- try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
- contents <- mconcat <$> manyTill inline (pCloses "span")
- return $ B.strikeout contents)
-
-pLineBreak :: PandocMonad m => TagParser m Inlines
-pLineBreak = do
- pSelfClosing (=="br") (const True)
- return B.linebreak
-
--- Unlike fromAttrib from tagsoup, this distinguishes
--- between a missing attribute and an attribute with empty content.
-maybeFromAttrib :: String -> Tag String -> Maybe String
-maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
-maybeFromAttrib _ _ = Nothing
-
-pLink :: PandocMonad m => TagParser m Inlines
-pLink = try $ do
- tag <- pSatisfy $ tagOpenLit "a" (const True)
- let title = fromAttrib "title" tag
- -- take id from id attribute if present, otherwise name
- let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag
- let cls = words $ fromAttrib "class" tag
- lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- -- check for href; if href, then a link, otherwise a span
- case maybeFromAttrib "href" tag of
- Nothing ->
- return $ B.spanWith (uid, cls, []) lab
- Just url' -> do
- mbBaseHref <- baseHref <$> getState
- let url = case (parseURIReference url', mbBaseHref) of
- (Just rel, Just bs) ->
- show (rel `nonStrictRelativeTo` bs)
- _ -> url'
- return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
-
-pImage :: PandocMonad m => TagParser m Inlines
-pImage = do
- tag <- pSelfClosing (=="img") (isJust . lookup "src")
- mbBaseHref <- baseHref <$> getState
- let url' = fromAttrib "src" tag
- let url = case (parseURIReference url', mbBaseHref) of
- (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
- _ -> url'
- let title = fromAttrib "title" tag
- let alt = fromAttrib "alt" tag
- let uid = fromAttrib "id" tag
- let cls = words $ fromAttrib "class" tag
- let getAtt k = case fromAttrib k tag of
- "" -> []
- v -> [(k, v)]
- let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
- return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
-
-pCode :: PandocMonad m => TagParser m Inlines
-pCode = try $ do
- (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
- result <- manyTill pAnyTag (pCloses open)
- return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
-
-pSpan :: PandocMonad m => TagParser m Inlines
-pSpan = try $ do
- guardEnabled Ext_native_spans
- TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
- contents <- pInTags "span" inline
- let isSmallCaps = fontVariant == "small-caps"
- where styleAttr = fromMaybe "" $ lookup "style" attr
- fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
- let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
- return $ tag contents
-
-pRawHtmlInline :: PandocMonad m => TagParser m Inlines
-pRawHtmlInline = do
- inplain <- asks inPlain
- result <- pSatisfy (tagComment (const True))
- <|> if inplain
- then pSatisfy (not . isBlockTag)
- else pSatisfy isInlineTag
- exts <- getOption readerExtensions
- let raw = renderTags' [result]
- if extensionEnabled Ext_raw_html exts
- then return $ B.rawInline "html" raw
- else ignore raw
-
-mathMLToTeXMath :: String -> Either String String
-mathMLToTeXMath s = writeTeX <$> readMathML s
-
-pMath :: PandocMonad m => Bool -> TagParser m Inlines
-pMath inCase = try $ do
- open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
- -- we'll assume math tags are MathML unless specially marked
- -- otherwise...
- unless inCase $
- guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
- contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math"))
- case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of
- Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $
- innerText contents
- Right [] -> return mempty
- Right x -> return $ case lookup "display" attr of
- Just "block" -> B.displayMath x
- _ -> B.math x
-
-pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines)
- -> TagParser m Inlines
-pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
-
-pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a
-pInTags tagtype parser = pInTags' tagtype (const True) parser
-
-pInTags' :: (PandocMonad m, Monoid a)
- => String
- -> (Tag String -> Bool)
- -> TagParser m a
- -> TagParser m a
-pInTags' tagtype tagtest parser = try $ do
- pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
- mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-
--- parses p, preceeded by an optional opening tag
--- and followed by an optional closing tags
-pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a
-pOptInTag tagtype p = try $ do
- skipMany pBlank
- optional $ pSatisfy (~== TagOpen tagtype [])
- skipMany pBlank
- x <- p
- skipMany pBlank
- optional $ pSatisfy (~== TagClose tagtype)
- skipMany pBlank
- return x
-
-pCloses :: PandocMonad m => String -> TagParser m ()
-pCloses tagtype = try $ do
- t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
- case t of
- (TagClose t') | t' == tagtype -> pAnyTag >> return ()
- (TagOpen t' _) | t' `closes` tagtype -> return ()
- (TagClose "ul") | tagtype == "li" -> return ()
- (TagClose "ol") | tagtype == "li" -> return ()
- (TagClose "dl") | tagtype == "dd" -> return ()
- (TagClose "table") | tagtype == "td" -> return ()
- (TagClose "table") | tagtype == "tr" -> return ()
- _ -> mzero
-
-pTagText :: PandocMonad m => TagParser m Inlines
-pTagText = try $ do
- (TagText str) <- pSatisfy isTagText
- st <- getState
- qu <- ask
- parsed <- lift $ lift $
- flip runReaderT qu $ runParserT (many pTagContents) st "text" str
- case parsed of
- Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'"
- Right result -> return $ mconcat result
-
-pBlank :: PandocMonad m => TagParser m ()
-pBlank = try $ do
- (TagText str) <- pSatisfy isTagText
- guard $ all isSpace str
-
-type InlinesParser m = HTMLParser m String
-
-pTagContents :: PandocMonad m => InlinesParser m Inlines
-pTagContents =
- B.displayMath <$> mathDisplay
- <|> B.math <$> mathInline
- <|> pStr
- <|> pSpace
- <|> smartPunctuation pTagContents
- <|> pSymbol
- <|> pBad
-
-pStr :: PandocMonad m => InlinesParser m Inlines
-pStr = do
- result <- many1 $ satisfy $ \c ->
- not (isSpace c) && not (isSpecial c) && not (isBad c)
- updateLastStrPos
- return $ B.str result
-
-isSpecial :: Char -> Bool
-isSpecial '"' = True
-isSpecial '\'' = True
-isSpecial '.' = True
-isSpecial '-' = True
-isSpecial '$' = True
-isSpecial '\8216' = True
-isSpecial '\8217' = True
-isSpecial '\8220' = True
-isSpecial '\8221' = True
-isSpecial _ = False
-
-pSymbol :: PandocMonad m => InlinesParser m Inlines
-pSymbol = satisfy isSpecial >>= return . B.str . (:[])
-
-isBad :: Char -> Bool
-isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-
-pBad :: PandocMonad m => InlinesParser m Inlines
-pBad = do
- c <- satisfy isBad
- let c' = case c of
- '\128' -> '\8364'
- '\130' -> '\8218'
- '\131' -> '\402'
- '\132' -> '\8222'
- '\133' -> '\8230'
- '\134' -> '\8224'
- '\135' -> '\8225'
- '\136' -> '\710'
- '\137' -> '\8240'
- '\138' -> '\352'
- '\139' -> '\8249'
- '\140' -> '\338'
- '\142' -> '\381'
- '\145' -> '\8216'
- '\146' -> '\8217'
- '\147' -> '\8220'
- '\148' -> '\8221'
- '\149' -> '\8226'
- '\150' -> '\8211'
- '\151' -> '\8212'
- '\152' -> '\732'
- '\153' -> '\8482'
- '\154' -> '\353'
- '\155' -> '\8250'
- '\156' -> '\339'
- '\158' -> '\382'
- '\159' -> '\376'
- _ -> '?'
- return $ B.str [c']
-
-pSpace :: PandocMonad m => InlinesParser m Inlines
-pSpace = many1 (satisfy isSpace) >>= \xs ->
- if '\n' `elem` xs
- then return B.softbreak
- else return B.space
-
---
--- Constants
---
-
-eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
- "del", "ins",
- "progress", "map", "area", "noscript", "script",
- "object", "svg", "video", "source"]
-
-{-
-inlineHtmlTags :: [[Char]]
-inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
- "br", "cite", "code", "dfn", "em", "font", "i", "img",
- "input", "kbd", "label", "q", "s", "samp", "select",
- "small", "span", "strike", "strong", "sub", "sup",
- "textarea", "tt", "u", "var"]
--}
-
-blockHtmlTags :: [String]
-blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
- "blockquote", "body", "button", "canvas",
- "caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "fieldset", "figcaption", "figure",
- "footer", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "head", "header", "hgroup", "hr", "html",
- "isindex", "menu", "noframes", "ol", "output", "p", "pre",
- "section", "table", "tbody", "textarea",
- "thead", "tfoot", "ul", "dd",
- "dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr", "script", "style"]
-
--- We want to allow raw docbook in markdown documents, so we
--- include docbook block tags here too.
-blockDocBookTags :: [String]
-blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
- "orderedlist", "segmentedlist", "simplelist",
- "variablelist", "caution", "important", "note", "tip",
- "warning", "address", "literallayout", "programlisting",
- "programlistingco", "screen", "screenco", "screenshot",
- "synopsis", "example", "informalexample", "figure",
- "informalfigure", "table", "informaltable", "para",
- "simpara", "formalpara", "equation", "informalequation",
- "figure", "screenshot", "mediaobject", "qandaset",
- "procedure", "task", "cmdsynopsis", "funcsynopsis",
- "classsynopsis", "blockquote", "epigraph", "msgset",
- "sidebar", "title"]
-
-epubTags :: [String]
-epubTags = ["case", "switch", "default"]
-
-blockTags :: [String]
-blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags
-
-isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen isInlineTagName (const True) t ||
- tagClose isInlineTagName t ||
- tagComment (const True) t
- where isInlineTagName x = x `notElem` blockTags
-
-isBlockTag :: Tag String -> Bool
-isBlockTag t = tagOpen isBlockTagName (const True) t ||
- tagClose isBlockTagName t ||
- tagComment (const True) t
- where isBlockTagName ('?':_) = True
- isBlockTagName ('!':_) = True
- isBlockTagName x = x `elem` blockTags
- || x `elem` eitherBlockOrInline
-
-isTextTag :: Tag String -> Bool
-isTextTag = tagText (const True)
-
-isCommentTag :: Tag String -> Bool
-isCommentTag = tagComment (const True)
-
--- taken from HXT and extended
--- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
-closes :: String -> String -> Bool
-_ `closes` "body" = False
-_ `closes` "html" = False
-"body" `closes` "head" = True
-"a" `closes` "a" = True
-"li" `closes` "li" = True
-"th" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
-"dd" `closes` t | t `elem` ["dt", "dd"] = True
-"dt" `closes` t | t `elem` ["dt","dd"] = True
-"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
-"optgroup" `closes` "optgroup" = True
-"optgroup" `closes` "option" = True
-"option" `closes` "option" = True
--- http://www.w3.org/TR/html-markup/p.html
-x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote",
- "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section",
- "table", "ul"] = True
-"meta" `closes` "meta" = True
-"form" `closes` "form" = True
-"label" `closes` "label" = True
-"map" `closes` "map" = True
-"object" `closes` "object" = True
-_ `closes` t | t `elem` ["option","style","script","textarea","title"] = True
-t `closes` "select" | t /= "option" = True
-"thead" `closes` t | t `elem` ["colgroup"] = True
-"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True
-"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True
-t `closes` t2 |
- t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
- t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
-t1 `closes` t2 |
- t1 `elem` blockTags &&
- t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
-_ `closes` _ = False
-
---- parsers for use in markdown, textile readers
-
--- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Monad m)
- => (Tag String -> Bool)
- -> ParserT String st m String
-htmlInBalanced f = try $ do
- lookAhead (char '<')
- inp <- getInput
- let ts = canonicalizeTags $
- parseTagsOptions parseOptions{ optTagWarning = True,
- optTagPosition = True } inp
- case ts of
- (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
- guard $ f t
- guard $ not $ hasTagWarning (t : take 1 rest)
- case htmlInBalanced' tn (t:rest) of
- [] -> mzero
- xs -> case reverse xs of
- (TagClose _ : TagPosition er ec : _) -> do
- let ls = er - sr
- let cs = ec - sc
- lscontents <- unlines <$> count ls anyLine
- cscontents <- count cs anyChar
- (_,closetag) <- htmlTag (~== TagClose tn)
- return (lscontents ++ cscontents ++ closetag)
- _ -> mzero
- _ -> mzero
-
-htmlInBalanced' :: String
- -> [Tag String]
- -> [Tag String]
-htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
- where go :: Int -> [Tag String] -> Maybe [Tag String]
- go n (t@(TagOpen tn' _):rest) | tn' == tagname =
- (t :) <$> go (n + 1) rest
- go 1 (t@(TagClose tn'):_) | tn' == tagname =
- return [t]
- go n (t@(TagClose tn'):rest) | tn' == tagname =
- (t :) <$> go (n - 1) rest
- go n (t:ts') = (t :) <$> go n ts'
- go _ [] = mzero
-
-hasTagWarning :: [Tag String] -> Bool
-hasTagWarning (TagWarning _:_) = True
-hasTagWarning _ = False
-
--- | Matches a tag meeting a certain condition.
-htmlTag :: Monad m
- => (Tag String -> Bool)
- -> ParserT [Char] st m (Tag String, String)
-htmlTag f = try $ do
- lookAhead (char '<')
- inp <- getInput
- let (next : _) = canonicalizeTags $ parseTagsOptions
- parseOptions{ optTagWarning = False } inp
- guard $ f next
- let handleTag tagname = do
- -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
- -- should NOT be parsed as an HTML tag, see #2277
- guard $ not ('.' `elem` tagname)
- -- <https://example.org> should NOT be a tag either.
- -- tagsoup will parse it as TagOpen "https:" [("example.org","")]
- guard $ not (null tagname)
- guard $ last tagname /= ':'
- rendered <- manyTill anyChar (char '>')
- return (next, rendered ++ ">")
- case next of
- TagComment s
- | "<!--" `isPrefixOf` inp -> do
- count (length s + 4) anyChar
- skipMany (satisfy (/='>'))
- char '>'
- return (next, "<!--" ++ s ++ "-->")
- | otherwise -> fail "bogus comment mode, HTML5 parse error"
- TagOpen tagname _attr -> handleTag tagname
- TagClose tagname -> handleTag tagname
- _ -> mzero
-
-mkAttr :: [(String, String)] -> Attr
-mkAttr attr = (attribsId, attribsClasses, attribsKV)
- where attribsId = fromMaybe "" $ lookup "id" attr
- attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes
- attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
-
--- Strip namespace prefixes
-stripPrefixes :: [Tag String] -> [Tag String]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag String -> Tag String
-stripPrefix (TagOpen s as) =
- TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: String -> String
-stripPrefix' s =
- case span (/= ':') s of
- (_, "") -> s
- (_, (_:ts)) -> ts
-
-isSpace :: Char -> Bool
-isSpace ' ' = True
-isSpace '\t' = True
-isSpace '\n' = True
-isSpace '\r' = True
-isSpace _ = False
-
--- Instances
-
-instance HasIdentifierList HTMLState where
- extractIdentifierList = identifiers
- updateIdentifierList f s = s{ identifiers = f (identifiers s) }
-
-instance HasHeaderMap HTMLState where
- extractHeaderMap = headerMap
- updateHeaderMap f s = s{ headerMap = f (headerMap s) }
-
--- This signature should be more general
--- MonadReader HTMLLocal m => HasQuoteContext st m
-instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
- getQuoteContext = asks quoteContext
- withQuoteContext q = local (\s -> s{quoteContext = q})
-
-instance HasReaderOptions HTMLState where
- extractReaderOptions = extractReaderOptions . parserState
-
-instance HasMeta HTMLState where
- setMeta s b st = st {parserState = setMeta s b $ parserState st}
- deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
-
-instance Default HTMLLocal where
- def = HTMLLocal NoQuote False False
-
-instance HasLastStrPosition HTMLState where
- setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
- getLastStrPos = getLastStrPos . parserState
-
-
--- EPUB Specific
---
---
-sectioningContent :: [String]
-sectioningContent = ["article", "aside", "nav", "section"]
-
-
-groupingContent :: [String]
-groupingContent = ["p", "hr", "pre", "blockquote", "ol"
- , "ul", "li", "dl", "dt", "dt", "dd"
- , "figure", "figcaption", "div", "main"]
-
-
-{-
-
-types :: [(String, ([String], Int))]
-types = -- Document divisions
- map (\s -> (s, (["section", "body"], 0)))
- ["volume", "part", "chapter", "division"]
- ++ -- Document section and components
- [
- ("abstract", ([], 0))]
--}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
deleted file mode 100644
index 310a04574..000000000
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ /dev/null
@@ -1,160 +0,0 @@
-{-# LANGUAGE CPP #-}
-{- |
- Module : Text.Pandoc.Readers.Haddock
- Copyright : Copyright (C) 2013 David Lazar
- License : GNU GPL, version 2 or above
-
- Maintainer : David Lazar <lazar6@illinois.edu>,
- John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
-
-Conversion of Haddock markup to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Haddock
- ( readHaddock
- ) where
-
-import Text.Pandoc.Builder (Blocks, Inlines)
-import qualified Text.Pandoc.Builder as B
-import Data.Monoid ((<>))
-import Text.Pandoc.Shared (trim, splitBy)
-import Data.List (intersperse, stripPrefix)
-import Data.Maybe (fromMaybe)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Documentation.Haddock.Parser
-import Documentation.Haddock.Types
-import Text.Pandoc.Error
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad)
-
-
--- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
- => ReaderOptions
- -> String
- -> m Pandoc
-readHaddock opts s = case readHaddockEither opts s of
- Right result -> return result
- Left e -> throwError e
-
-readHaddockEither :: ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse
- -> Either PandocError Pandoc
-readHaddockEither _opts =
-#if MIN_VERSION_haddock_library(1,2,0)
- Right . B.doc . docHToBlocks . _doc . parseParas
-#else
- Right . B.doc . docHToBlocks . parseParas
-#endif
-
-docHToBlocks :: DocH String Identifier -> Blocks
-docHToBlocks d' =
- case d' of
- DocEmpty -> mempty
- DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
- B.headerWith (ident,[],[]) (headerLevel h)
- (docHToInlines False $ headerTitle h)
- DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
- DocString _ -> inlineFallback
- DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
- DocParagraph x -> B.para $ docHToInlines False x
- DocIdentifier _ -> inlineFallback
- DocIdentifierUnchecked _ -> inlineFallback
- DocModule s -> B.plain $ docHToInlines False $ DocModule s
- DocWarning _ -> mempty -- TODO
- DocEmphasis _ -> inlineFallback
- DocMonospaced _ -> inlineFallback
- DocBold _ -> inlineFallback
-#if MIN_VERSION_haddock_library(1,4,0)
- DocMathInline _ -> inlineFallback
- DocMathDisplay _ -> inlineFallback
-#endif
- DocHeader h -> B.header (headerLevel h)
- (docHToInlines False $ headerTitle h)
- DocUnorderedList items -> B.bulletList (map docHToBlocks items)
- DocOrderedList items -> B.orderedList (map docHToBlocks items)
- DocDefList items -> B.definitionList (map (\(d,t) ->
- (docHToInlines False d,
- [consolidatePlains $ docHToBlocks t])) items)
- DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s
- DocCodeBlock d -> B.para $ docHToInlines True d
- DocHyperlink _ -> inlineFallback
- DocPic _ -> inlineFallback
- DocAName _ -> inlineFallback
- DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
- DocExamples es -> mconcat $ map (\e ->
- makeExample ">>>" (exampleExpression e) (exampleResult e)) es
-
- where inlineFallback = B.plain $ docHToInlines False d'
- consolidatePlains = B.fromList . consolidatePlains' . B.toList
- consolidatePlains' zs@(Plain _ : _) =
- let (xs, ys) = span isPlain zs in
- Para (concatMap extractContents xs) : consolidatePlains' ys
- consolidatePlains' (x : xs) = x : consolidatePlains' xs
- consolidatePlains' [] = []
- isPlain (Plain _) = True
- isPlain _ = False
- extractContents (Plain xs) = xs
- extractContents _ = []
-
-docHToInlines :: Bool -> DocH String Identifier -> Inlines
-docHToInlines isCode d' =
- case d' of
- DocEmpty -> mempty
- DocAppend d1 d2 -> mappend (docHToInlines isCode d1)
- (docHToInlines isCode d2)
- DocString s
- | isCode -> mconcat $ intersperse B.linebreak
- $ map B.code $ splitBy (=='\n') s
- | otherwise -> B.text s
- DocParagraph _ -> mempty
- DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s
- DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s
- DocModule s -> B.codeWith ("",["haskell","module"],[]) s
- DocWarning _ -> mempty -- TODO
- DocEmphasis d -> B.emph (docHToInlines isCode d)
- DocMonospaced (DocString s) -> B.code s
- DocMonospaced d -> docHToInlines True d
- DocBold d -> B.strong (docHToInlines isCode d)
-#if MIN_VERSION_haddock_library(1,4,0)
- DocMathInline s -> B.math s
- DocMathDisplay s -> B.displayMath s
-#endif
- DocHeader _ -> mempty
- DocUnorderedList _ -> mempty
- DocOrderedList _ -> mempty
- DocDefList _ -> mempty
- DocCodeBlock _ -> mempty
- DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h)
- (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h)
- DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p)
- (maybe mempty B.text $ pictureTitle p)
- DocAName s -> B.spanWith (s,["anchor"],[]) mempty
- DocProperty _ -> mempty
- DocExamples _ -> mempty
-
--- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Blocks
-makeExample prompt expression result =
- B.para $ B.codeWith ("",["prompt"],[]) prompt
- <> B.space
- <> B.codeWith ([], ["haskell","expr"], []) (trim expression)
- <> B.linebreak
- <> (mconcat $ intersperse B.linebreak $ map coder result')
- where
- -- 1. drop trailing whitespace from the prompt, remember the prefix
- prefix = takeWhile (`elem` " \t") prompt
-
- -- 2. drop, if possible, the exact same sequence of whitespace
- -- characters from each result line
- --
- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
- -- empty line
- result' = map (substituteBlankLine . tryStripPrefix prefix) result
- where
- tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-
- substituteBlankLine "<BLANKLINE>" = ""
- substituteBlankLine line = line
- coder = B.codeWith ([], ["result"], [])
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
deleted file mode 100644
index 9f9a79535..000000000
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ /dev/null
@@ -1,1437 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of LaTeX to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.LaTeX ( readLaTeX,
- rawLaTeXInline,
- rawLaTeXBlock,
- inlineCommand,
- ) where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
- mathDisplay, mathInline)
-import Data.Char ( chr, ord, isLetter, isAlphaNum )
-import Control.Monad
-import Text.Pandoc.Builder
-import Control.Applicative ((<|>), many, optional)
-import Data.Maybe (fromMaybe, maybeToList)
-import System.FilePath (replaceExtension, takeExtension, addExtension)
-import Data.List (intercalate)
-import qualified Data.Map as M
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
-import Text.Pandoc.ImageSize (numUnit, showFl)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, report,
- readFileFromDirs)
-
--- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> m Pandoc
-readLaTeX opts ltx = do
- parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-parseLaTeX :: PandocMonad m => LP m Pandoc
-parseLaTeX = do
- bs <- blocks
- eof
- st <- getState
- let meta = stateMeta st
- let (Pandoc _ bs') = doc bs
- return $ Pandoc meta bs'
-
-type LP m = ParserT String ParserState m
-
-anyControlSeq :: PandocMonad m => LP m String
-anyControlSeq = do
- char '\\'
- next <- option '\n' anyChar
- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
-
-controlSeq :: PandocMonad m => String -> LP m String
-controlSeq name = try $ do
- char '\\'
- case name of
- "" -> mzero
- [c] | not (isLetter c) -> string [c]
- cs -> string cs <* notFollowedBy letter <* optional sp
- return name
-
-dimenarg :: PandocMonad m => LP m String
-dimenarg = try $ do
- ch <- option "" $ string "="
- num <- many1 digit
- dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- return $ ch ++ num ++ dim
-
-sp :: PandocMonad m => LP m ()
-sp = whitespace <|> endline
-
-whitespace :: PandocMonad m => LP m ()
-whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
-
-endline :: PandocMonad m => LP m ()
-endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
-
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-
-tildeEscape :: PandocMonad m => LP m Char
-tildeEscape = try $ do
- string "^^"
- c <- satisfy (\x -> x >= '\0' && x <= '\128')
- d <- if isLowerHex c
- then option "" $ count 1 (satisfy isLowerHex)
- else return ""
- if null d
- then case ord c of
- x | x >= 64 && x <= 127 -> return $ chr (x - 64)
- | otherwise -> return $ chr (x + 64)
- else return $ chr $ read ('0':'x':c:d)
-
-comment :: PandocMonad m => LP m ()
-comment = do
- char '%'
- skipMany (satisfy (/='\n'))
- optional newline
- return ()
-
-bgroup :: PandocMonad m => LP m ()
-bgroup = try $ do
- skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
- () <$ char '{'
- <|> () <$ controlSeq "bgroup"
- <|> () <$ controlSeq "begingroup"
-
-egroup :: PandocMonad m => LP m ()
-egroup = () <$ char '}'
- <|> () <$ controlSeq "egroup"
- <|> () <$ controlSeq "endgroup"
-
-grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
-grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m String
-braced = bgroup *> (concat <$> manyTill
- ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
- <|> try (string "\\}")
- <|> try (string "\\{")
- <|> try (string "\\\\")
- <|> ((\x -> "{" ++ x ++ "}") <$> braced)
- <|> count 1 anyChar
- ) egroup)
-
-bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
-
-mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
-mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
-
-mathInline :: PandocMonad m => LP m String -> LP m Inlines
-mathInline p = math <$> (try p >>= applyMacros')
-
-mathChars :: PandocMonad m => LP m String
-mathChars =
- concat <$> many (escapedChar
- <|> (snd <$> withRaw braced)
- <|> many1 (satisfy isOrdChar))
- where escapedChar = try $ do char '\\'
- c <- anyChar
- return ['\\',c]
- isOrdChar '$' = False
- isOrdChar '{' = False
- isOrdChar '}' = False
- isOrdChar '\\' = False
- isOrdChar _ = True
-
-quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
-quoted' f starter ender = do
- startchs <- starter
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then do
- ils <- many (notFollowedBy ender >> inline)
- (ender >> return (f (mconcat ils))) <|>
- (<> mconcat ils) <$>
- lit (case startchs of
- "``" -> "“"
- "`" -> "‘"
- _ -> startchs)
- else lit startchs
-
-doubleQuote :: PandocMonad m => LP m Inlines
-doubleQuote = do
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
-
-singleQuote :: PandocMonad m => LP m Inlines
-singleQuote = do
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- else str <$> many1 (oneOf "`\'‘’")
-
-inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> inlineText
- <|> inlineCommand
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (char '-' *> option (str "-")
- (char '-' *> option (str "–") (str "—" <$ char '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (string "''"))
- <|> (str "”" <$ char '”')
- <|> (str "’" <$ char '\'')
- <|> (str "’" <$ char '’')
- <|> (str "\160" <$ char '~')
- <|> mathDisplay (string "$$" *> mathChars <* string "$$")
- <|> mathInline (char '$' *> mathChars <* char '$')
- <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str . (:[]) <$> tildeEscape)
- <|> (do res <- oneOf "#&~^'`\"[]"
- pos <- getPosition
- report $ ParsingUnescaped [res] pos
- return $ str [res])
-
-inlines :: PandocMonad m => LP m Inlines
-inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
-
-inlineGroup :: PandocMonad m => LP m Inlines
-inlineGroup = do
- ils <- grouped inline
- if isNull ils
- then return mempty
- else return $ spanWith nullAttr ils
- -- we need the span so we can detitlecase bibtex entries;
- -- we need to know when something is {C}apitalized
-
-block :: PandocMonad m => LP m Blocks
-block = (mempty <$ comment)
- <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
- <|> environment
- <|> include
- <|> macro
- <|> blockCommand
- <|> paragraph
- <|> grouped block
- <|> (mempty <$ char '&') -- loose & in table environment
-
-
-blocks :: PandocMonad m => LP m Blocks
-blocks = mconcat <$> many block
-
-getRawCommand :: PandocMonad m => String -> LP m String
-getRawCommand name' = do
- rawargs <- withRaw (many (try (optional sp *> opt)) *>
- option "" (try (optional sp *> dimenarg)) *>
- many braced)
- return $ '\\' : name' ++ snd rawargs
-
-lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
-lookupListDefault d = (fromMaybe d .) . lookupList
- where
- lookupList l m = msum $ map (`M.lookup` m) l
-
-blockCommand :: PandocMonad m => LP m Blocks
-blockCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*" <* optional sp)
- let name' = name ++ star
- let raw = do
- rawcommand <- getRawCommand name'
- transformed <- applyMacros' rawcommand
- guard $ transformed /= rawcommand
- notFollowedBy $ parseFromString inlines transformed
- parseFromString blocks transformed
- lookupListDefault raw [name',name] blockCommands
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
--- eat an optional argument and one or more arguments in braces
-ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
-ignoreInlines name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawInline "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-guardRaw :: PandocMonad m => LP m ()
-guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex
-
-optargs :: PandocMonad m => LP m String
-optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
-ignoreBlocks name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
-blockCommands = M.fromList $
- [ ("par", mempty <$ skipopts)
- , ("parbox", braced >> grouped blocks)
- , ("title", mempty <$ (skipopts *>
- (grouped inline >>= addMeta "title")
- <|> (grouped block >>= addMeta "title")))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
- , ("author", mempty <$ (skipopts *> authors))
- -- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
- , ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- -- sectioning
- , ("chapter", updateState (\s -> s{ stateHasChapters = True })
- *> section nullAttr 0)
- , ("chapter*", updateState (\s -> s{ stateHasChapters = True })
- *> section ("",["unnumbered"],[]) 0)
- , ("section", section nullAttr 1)
- , ("section*", section ("",["unnumbered"],[]) 1)
- , ("subsection", section nullAttr 2)
- , ("subsection*", section ("",["unnumbered"],[]) 2)
- , ("subsubsection", section nullAttr 3)
- , ("subsubsection*", section ("",["unnumbered"],[]) 3)
- , ("paragraph", section nullAttr 4)
- , ("paragraph*", section ("",["unnumbered"],[]) 4)
- , ("subparagraph", section nullAttr 5)
- , ("subparagraph*", section ("",["unnumbered"],[]) 5)
- -- beamer slides
- , ("frametitle", section nullAttr 3)
- , ("framesubtitle", section nullAttr 4)
- -- letters
- , ("opening", (para . trimInlines) <$> (skipopts *> tok))
- , ("closing", skipopts *> closing)
- --
- , ("hrule", pure horizontalRule)
- , ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> looseItem)
- , ("documentclass", skipopts *> braced *> preamble)
- , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> setCaption)
- , ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- , ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- -- includes
- , ("lstinputlisting", inputListing)
- ] ++ map ignoreBlocks
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks
- [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
- -- newcommand, etc. should be parsed by macro, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
- , "bibliographystyle"
- , "maketitle", "makeindex", "makeglossary"
- , "addcontentsline", "addtocontents", "addtocounter"
- -- \ignore{} is used conventionally in literate haskell for definitions
- -- that are to be processed by the compiler but not printed.
- , "ignore"
- , "hyperdef"
- , "markboth", "markright", "markleft"
- , "newpage"
- ]
-
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
-addMeta field val = updateState $ \st ->
- st{ stateMeta = addMetaField field val $ stateMeta st }
-
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-
-setCaption :: PandocMonad m => LP m Blocks
-setCaption = do
- ils <- tok
- mblabel <- option Nothing $
- try $ spaces' >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("data-label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ stateCaption = Just ils' }
- return mempty
-
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
-
-authors :: PandocMonad m => LP m ()
-authors = try $ do
- char '{'
- let oneAuthor = mconcat <$>
- many1 (notFollowedBy' (controlSeq "and") >>
- (inline <|> mempty <$ blockCommand))
- -- skip e.g. \vspace{10pt}
- auths <- sepBy oneAuthor (controlSeq "and")
- char '}'
- addMeta "author" (map trimInlines auths)
-
-section :: PandocMonad m => Attr -> Int -> LP m Blocks
-section (ident, classes, kvs) lvl = do
- hasChapters <- stateHasChapters `fmap` getState
- let lvl' = if hasChapters then lvl + 1 else lvl
- skipopts
- contents <- grouped inline
- lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
- attr' <- registerHeader (lab, classes, kvs) contents
- return $ headerWith attr' lvl' contents
-
-inlineCommand :: PandocMonad m => LP m Inlines
-inlineCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- guard $ not $ isBlockCommand name
- exts <- getOption readerExtensions
- star <- option "" (string "*")
- let name' = name ++ star
- let raw = do
- rawargs <- withRaw
- (skipangles *> skipopts *> option "" dimenarg *> many braced)
- let rawcommand = '\\' : name ++ star ++ snd rawargs
- transformed <- applyMacros' rawcommand
- if transformed /= rawcommand
- then parseFromString inlines transformed
- else if extensionEnabled Ext_raw_tex exts
- then return $ rawInline "latex" rawcommand
- else ignore rawcommand
- (lookupListDefault mzero [name',name] inlineCommands <*
- optional (try (string "{}")))
- <|> raw
-
-unlessParseRaw :: PandocMonad m => LP m ()
-unlessParseRaw = getOption readerExtensions >>=
- guard . not . extensionEnabled Ext_raw_tex
-
-isBlockCommand :: String -> Bool
-isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
-
-
-inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
-inlineEnvironments = M.fromList
- [ ("displaymath", mathEnv id Nothing "displaymath")
- , ("math", math <$> verbEnv "math")
- , ("equation", mathEnv id Nothing "equation")
- , ("equation*", mathEnv id Nothing "equation*")
- , ("gather", mathEnv id (Just "gathered") "gather")
- , ("gather*", mathEnv id (Just "gathered") "gather*")
- , ("multline", mathEnv id (Just "gathered") "multline")
- , ("multline*", mathEnv id (Just "gathered") "multline*")
- , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
- , ("align", mathEnv id (Just "aligned") "align")
- , ("align*", mathEnv id (Just "aligned") "align*")
- , ("alignat", mathEnv id (Just "aligned") "alignat")
- , ("alignat*", mathEnv id (Just "aligned") "alignat*")
- ]
-
-inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
-inlineCommands = M.fromList $
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("ldots", lit "…")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("label", unlessParseRaw >> (inBrackets <$> tok))
- , ("ref", unlessParseRaw >> (inBrackets <$> tok))
- , ("noindent", unlessParseRaw >> ignore "noindent")
- , ("textgreek", tok)
- , ("sep", lit ",")
- , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
- , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
- , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", try $ tok >>= accent hungarumlaut)
- , ("`", option (str "`") $ try $ tok >>= accent grave)
- , ("'", option (str "'") $ try $ tok >>= accent acute)
- , ("^", option (str "^") $ try $ tok >>= accent circ)
- , ("~", option (str "~") $ try $ tok >>= accent tilde)
- , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
- , (".", option (str ".") $ try $ tok >>= accent dot)
- , ("=", option (str "=") $ try $ tok >>= accent macron)
- , ("c", option (str "c") $ try $ tok >>= accent cedilla)
- , ("v", option (str "v") $ try $ tok >>= accent hacek)
- , ("u", option (str "u") $ try $ tok >>= accent breve)
- , ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
- , (",", pure mempty)
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
- , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
- , ("verb", doverb)
- , ("lstinline", skipopts *> doverb)
- , ("Verb", doverb)
- , ("texttt", (code . stringify . toList) <$> tok)
- , ("url", (unescapeURL <$> braced) >>= \url ->
- pure (link url "" (str url)))
- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
- tok >>= \lab ->
- pure (link url "" lab))
- , ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . removeDoubleQuotes <$> braced
- mkImage options src)
- , ("enquote", enquote)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- ] ++ map ignoreInlines
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks:
- [ "index" ]
-
-mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
-mkImage options src = do
- let replaceTextwidth (k,v) = case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
- _ -> (k, v)
- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
- let attr = ("",[], kvs)
- let alt = str "image"
- case takeExtension src of
- "" -> do
- defaultExt <- getOption readerDefaultImageExtension
- return $ imageWith attr (addExtension src defaultExt) "" alt
- _ -> return $ imageWith attr src "" alt
-
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
-
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
-
-enquote :: PandocMonad m => LP m Inlines
-enquote = do
- skipopts
- context <- stateQuoteContext <$> getState
- if context == InDoubleQuote
- then singleQuoted <$> withQuoteContext InSingleQuote tok
- else doubleQuoted <$> withQuoteContext InDoubleQuote tok
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
- marker <- anyChar
- code <$> manyTill (satisfy (/='\n')) (char marker)
-
-doLHSverb :: PandocMonad m => LP m Inlines
-doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
-
-lit :: String -> LP m Inlines
-lit = pure . str
-
-accent :: (Char -> String) -> Inlines -> LP m Inlines
-accent f ils =
- case toList ils of
- (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
- [] -> mzero
- _ -> return ils
-
-grave :: Char -> String
-grave 'A' = "À"
-grave 'E' = "È"
-grave 'I' = "Ì"
-grave 'O' = "Ò"
-grave 'U' = "Ù"
-grave 'a' = "à"
-grave 'e' = "è"
-grave 'i' = "ì"
-grave 'o' = "ò"
-grave 'u' = "ù"
-grave c = [c]
-
-acute :: Char -> String
-acute 'A' = "Á"
-acute 'E' = "É"
-acute 'I' = "Í"
-acute 'O' = "Ó"
-acute 'U' = "Ú"
-acute 'Y' = "Ý"
-acute 'a' = "á"
-acute 'e' = "é"
-acute 'i' = "í"
-acute 'o' = "ó"
-acute 'u' = "ú"
-acute 'y' = "ý"
-acute 'C' = "Ć"
-acute 'c' = "ć"
-acute 'L' = "Ĺ"
-acute 'l' = "ĺ"
-acute 'N' = "Ń"
-acute 'n' = "ń"
-acute 'R' = "Ŕ"
-acute 'r' = "ŕ"
-acute 'S' = "Ś"
-acute 's' = "ś"
-acute 'Z' = "Ź"
-acute 'z' = "ź"
-acute c = [c]
-
-circ :: Char -> String
-circ 'A' = "Â"
-circ 'E' = "Ê"
-circ 'I' = "Î"
-circ 'O' = "Ô"
-circ 'U' = "Û"
-circ 'a' = "â"
-circ 'e' = "ê"
-circ 'i' = "î"
-circ 'o' = "ô"
-circ 'u' = "û"
-circ 'C' = "Ĉ"
-circ 'c' = "ĉ"
-circ 'G' = "Ĝ"
-circ 'g' = "ĝ"
-circ 'H' = "Ĥ"
-circ 'h' = "ĥ"
-circ 'J' = "Ĵ"
-circ 'j' = "ĵ"
-circ 'S' = "Ŝ"
-circ 's' = "ŝ"
-circ 'W' = "Ŵ"
-circ 'w' = "ŵ"
-circ 'Y' = "Ŷ"
-circ 'y' = "ŷ"
-circ c = [c]
-
-tilde :: Char -> String
-tilde 'A' = "Ã"
-tilde 'a' = "ã"
-tilde 'O' = "Õ"
-tilde 'o' = "õ"
-tilde 'I' = "Ĩ"
-tilde 'i' = "ĩ"
-tilde 'U' = "Ũ"
-tilde 'u' = "ũ"
-tilde 'N' = "Ñ"
-tilde 'n' = "ñ"
-tilde c = [c]
-
-umlaut :: Char -> String
-umlaut 'A' = "Ä"
-umlaut 'E' = "Ë"
-umlaut 'I' = "Ï"
-umlaut 'O' = "Ö"
-umlaut 'U' = "Ü"
-umlaut 'a' = "ä"
-umlaut 'e' = "ë"
-umlaut 'i' = "ï"
-umlaut 'o' = "ö"
-umlaut 'u' = "ü"
-umlaut c = [c]
-
-hungarumlaut :: Char -> String
-hungarumlaut 'A' = "A̋"
-hungarumlaut 'E' = "E̋"
-hungarumlaut 'I' = "I̋"
-hungarumlaut 'O' = "Ő"
-hungarumlaut 'U' = "Ű"
-hungarumlaut 'Y' = "ӳ"
-hungarumlaut 'a' = "a̋"
-hungarumlaut 'e' = "e̋"
-hungarumlaut 'i' = "i̋"
-hungarumlaut 'o' = "ő"
-hungarumlaut 'u' = "ű"
-hungarumlaut 'y' = "ӳ"
-hungarumlaut c = [c]
-
-dot :: Char -> String
-dot 'C' = "Ċ"
-dot 'c' = "ċ"
-dot 'E' = "Ė"
-dot 'e' = "ė"
-dot 'G' = "Ġ"
-dot 'g' = "ġ"
-dot 'I' = "İ"
-dot 'Z' = "Ż"
-dot 'z' = "ż"
-dot c = [c]
-
-macron :: Char -> String
-macron 'A' = "Ā"
-macron 'E' = "Ē"
-macron 'I' = "Ī"
-macron 'O' = "Ō"
-macron 'U' = "Ū"
-macron 'a' = "ā"
-macron 'e' = "ē"
-macron 'i' = "ī"
-macron 'o' = "ō"
-macron 'u' = "ū"
-macron c = [c]
-
-cedilla :: Char -> String
-cedilla 'c' = "ç"
-cedilla 'C' = "Ç"
-cedilla 's' = "ş"
-cedilla 'S' = "Ş"
-cedilla 't' = "ţ"
-cedilla 'T' = "Ţ"
-cedilla 'e' = "ȩ"
-cedilla 'E' = "Ȩ"
-cedilla 'h' = "ḩ"
-cedilla 'H' = "Ḩ"
-cedilla 'o' = "o̧"
-cedilla 'O' = "O̧"
-cedilla c = [c]
-
-hacek :: Char -> String
-hacek 'A' = "Ǎ"
-hacek 'a' = "ǎ"
-hacek 'C' = "Č"
-hacek 'c' = "č"
-hacek 'D' = "Ď"
-hacek 'd' = "ď"
-hacek 'E' = "Ě"
-hacek 'e' = "ě"
-hacek 'G' = "Ǧ"
-hacek 'g' = "ǧ"
-hacek 'H' = "Ȟ"
-hacek 'h' = "ȟ"
-hacek 'I' = "Ǐ"
-hacek 'i' = "ǐ"
-hacek 'j' = "ǰ"
-hacek 'K' = "Ǩ"
-hacek 'k' = "ǩ"
-hacek 'L' = "Ľ"
-hacek 'l' = "ľ"
-hacek 'N' = "Ň"
-hacek 'n' = "ň"
-hacek 'O' = "Ǒ"
-hacek 'o' = "ǒ"
-hacek 'R' = "Ř"
-hacek 'r' = "ř"
-hacek 'S' = "Š"
-hacek 's' = "š"
-hacek 'T' = "Ť"
-hacek 't' = "ť"
-hacek 'U' = "Ǔ"
-hacek 'u' = "ǔ"
-hacek 'Z' = "Ž"
-hacek 'z' = "ž"
-hacek c = [c]
-
-breve :: Char -> String
-breve 'A' = "Ă"
-breve 'a' = "ă"
-breve 'E' = "Ĕ"
-breve 'e' = "ĕ"
-breve 'G' = "Ğ"
-breve 'g' = "ğ"
-breve 'I' = "Ĭ"
-breve 'i' = "ĭ"
-breve 'O' = "Ŏ"
-breve 'o' = "ŏ"
-breve 'U' = "Ŭ"
-breve 'u' = "ŭ"
-breve c = [c]
-
-tok :: PandocMonad m => LP m Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
-
-opt :: PandocMonad m => LP m Inlines
-opt = bracketed inline
-
-rawopt :: PandocMonad m => LP m String
-rawopt = do
- contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
- try (string "\\[") <|> rawopt)
- optional sp
- return $ "[" ++ contents ++ "]"
-
-skipopts :: PandocMonad m => LP m ()
-skipopts = skipMany rawopt
-
--- opts in angle brackets are used in beamer
-rawangle :: PandocMonad m => LP m ()
-rawangle = try $ do
- char '<'
- skipMany (noneOf ">")
- char '>'
- return ()
-
-skipangles :: PandocMonad m => LP m ()
-skipangles = skipMany rawangle
-
-inlineText :: PandocMonad m => LP m Inlines
-inlineText = str <$> many1 inlineChar
-
-inlineChar :: PandocMonad m => LP m Char
-inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
-
-environment :: PandocMonad m => LP m Blocks
-environment = do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name environments
- <|> rawEnv name
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name inlineEnvironments
-
-rawEnv :: PandocMonad m => String -> LP m Blocks
-rawEnv name = do
- exts <- getOption readerExtensions
- let parseRaw = extensionEnabled Ext_raw_tex exts
- rawOptions <- mconcat <$> many rawopt
- let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
- unless parseRaw $ do
- pos1 <- getPosition
- report $ SkippedContent beginCommand pos1
- (bs, raw) <- withRaw $ env name blocks
- raw' <- applyMacros' raw
- if parseRaw
- then return $ rawBlock "latex" $ beginCommand ++ raw'
- else do
- pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
- return bs
-
-----
-
-braced' :: PandocMonad m => LP m String
-braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
-
-maybeAddExtension :: String -> FilePath -> FilePath
-maybeAddExtension ext fp =
- if null (takeExtension fp)
- then addExtension fp ext
- else fp
-
-include :: PandocMonad m => LP m Blocks
-include = do
- fs' <- try $ do
- char '\\'
- name <- try (string "include")
- <|> try (string "input")
- <|> string "usepackage"
- -- skip options
- skipMany $ try $ char '[' *> manyTill anyChar (char ']')
- fs <- (map trim . splitBy (==',')) <$> braced'
- return $ if name == "usepackage"
- then map (maybeAddExtension ".sty") fs
- else map (maybeAddExtension ".tex") fs
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mconcat <$> mapM (insertIncludedFile blocks dirs) fs'
-
-inputListing :: PandocMonad m => LP m Blocks
-inputListing = do
- pos <- getPosition
- options <- option [] keyvals
- f <- filter (/='"') <$> braced
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs f
- codeLines <- case mbCode of
- Just s -> return $ lines s
- Nothing -> do
- report $ CouldNotLoadIncludeFile f pos
- return []
- let (ident,classes,kvs) = parseListingsOptions options
- let language = case lookup "language" options >>= fromListingsLanguage of
- Just l -> [l]
- Nothing -> take 1 $ languagesByExtension (takeExtension f)
- let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
- let lastline = fromMaybe (length codeLines) $
- lookup "lastline" options >>= safeRead
- let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
- drop (firstline - 1) codeLines
- return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
-
-parseListingsOptions :: [(String, String)] -> Attr
-parseListingsOptions options =
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- ++ maybeToList (lookup "language" options
- >>= fromListingsLanguage)
- in (fromMaybe "" (lookup "label" options), classes, kvs)
-
-----
-
-keyval :: PandocMonad m => LP m (String, String)
-keyval = try $ do
- key <- many1 alphaNum
- val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
- skipMany spaceChar
- optional (char ',')
- skipMany spaceChar
- return (key, val)
-
-
-keyvals :: PandocMonad m => LP m [(String, String)]
-keyvals = try $ char '[' *> manyTill keyval (char ']')
-
-alltt :: PandocMonad m => String -> LP m Blocks
-alltt t = walk strToCode <$> parseFromString blocks
- (substitute " " "\\ " $ substitute "%" "\\%" $
- intercalate "\\\\\n" $ lines t)
- where strToCode (Str s) = Code nullAttr s
- strToCode x = x
-
-rawLaTeXBlock :: PandocMonad m => LP m String
-rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-
-rawLaTeXInline :: PandocMonad m => LP m Inline
-rawLaTeXInline = do
- raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
- RawInline "latex" <$> applyMacros' raw
-
-addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
-addImageCaption = walkM go
- where go (Image attr alt (src,tit)) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:")
- Nothing -> Image attr alt (src,tit)
- go x = return x
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table c als ws hs rs) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Table (toList ils) als ws hs rs
- Nothing -> Table c als ws hs rs
- go x = return x
-
-environments :: PandocMonad m => M.Map String (LP m Blocks)
-environments = M.fromList
- [ ("document", env "document" blocks <* skipMany anyChar)
- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
- , ("letter", env "letter" letterContents)
- , ("minipage", env "minipage" $
- skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- , ("figure", env "figure" $
- resetCaption *> skipopts *> blocks >>= addImageCaption)
- , ("center", env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable False >>= addTableCaption)
- , ("table", env "table" $
- resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular" $ simpTable True)
- , ("tabular", env "tabular" $ simpTable False)
- , ("quote", blockQuote <$> env "quote" blocks)
- , ("quotation", blockQuote <$> env "quotation" blocks)
- , ("verse", blockQuote <$> env "verse" blocks)
- , ("itemize", bulletList <$> listenv "itemize" (many item))
- , ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", orderedList')
- , ("alltt", alltt =<< verbEnv "alltt")
- , ("code", guardEnabled Ext_literate_haskell *>
- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- verbEnv "code"))
- , ("comment", mempty <$ verbEnv "comment")
- , ("verbatim", codeBlock <$> verbEnv "verbatim")
- , ("Verbatim", fancyverbEnv "Verbatim")
- , ("BVerbatim", fancyverbEnv "BVerbatim")
- , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
- codeBlockWith attr <$> verbEnv "lstlisting")
- , ("minted", do options <- option [] keyvals
- lang <- grouped (many1 $ satisfy (/='}'))
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
- [ "numberLines" |
- lookup "linenos" options == Just "true" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv "minted")
- , ("obeylines", parseFromString
- (para . trimInlines . mconcat <$> many inline) =<<
- intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnv para Nothing "displaymath")
- , ("equation", mathEnv para Nothing "equation")
- , ("equation*", mathEnv para Nothing "equation*")
- , ("gather", mathEnv para (Just "gathered") "gather")
- , ("gather*", mathEnv para (Just "gathered") "gather*")
- , ("multline", mathEnv para (Just "gathered") "multline")
- , ("multline*", mathEnv para (Just "gathered") "multline*")
- , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
- , ("align", mathEnv para (Just "aligned") "align")
- , ("align*", mathEnv para (Just "aligned") "align*")
- , ("alignat", mathEnv para (Just "aligned") "alignat")
- , ("alignat*", mathEnv para (Just "aligned") "alignat*")
- ]
-
-letterContents :: PandocMonad m => LP m Blocks
-letterContents = do
- bs <- blocks
- st <- getState
- -- add signature (author) and address (title)
- let addr = case lookupMeta "address" (stateMeta st) of
- Just (MetaBlocks [Plain xs]) ->
- para $ trimInlines $ fromList xs
- _ -> mempty
- return $ addr <> bs -- sig added by \closing
-
-closing :: PandocMonad m => LP m Blocks
-closing = do
- contents <- tok
- st <- getState
- let extractInlines (MetaBlocks [Plain ys]) = ys
- extractInlines (MetaBlocks [Para ys ]) = ys
- extractInlines _ = []
- let sigs = case lookupMeta "author" (stateMeta st) of
- Just (MetaList xs) ->
- para $ trimInlines $ fromList $
- intercalate [LineBreak] $ map extractInlines xs
- _ -> mempty
- return $ para (trimInlines contents) <> sigs
-
-item :: PandocMonad m => LP m Blocks
-item = blocks *> controlSeq "item" *> skipopts *> blocks
-
-looseItem :: PandocMonad m => LP m Blocks
-looseItem = do
- ctx <- stateParserContext `fmap` getState
- if ctx == ListItemState
- then mzero
- else return mempty
-
-descItem :: PandocMonad m => LP m (Inlines, [Blocks])
-descItem = do
- blocks -- skip blocks before item
- controlSeq "item"
- optional sp
- ils <- opt
- bs <- blocks
- return (ils, [bs])
-
-env :: PandocMonad m => String -> LP m a -> LP m a
-env name p = p <*
- (try (controlSeq "end" *> braced >>= guard . (== name))
- <?> ("\\end{" ++ name ++ "}"))
-
-listenv :: PandocMonad m => String -> LP m a -> LP m a
-listenv name p = try $ do
- oldCtx <- stateParserContext `fmap` getState
- updateState $ \st -> st{ stateParserContext = ListItemState }
- res <- env name p
- updateState $ \st -> st{ stateParserContext = oldCtx }
- return res
-
-mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a
-mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
- "\\end{" ++ y ++ "}"
-
-verbEnv :: PandocMonad m => String -> LP m String
-verbEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- res <- manyTill anyChar endEnv
- return $ stripTrailingNewlines res
-
-fancyverbEnv :: PandocMonad m => String -> LP m Blocks
-fancyverbEnv name = do
- options <- option [] keyvals
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv name
-
-orderedList' :: PandocMonad m => LP m Blocks
-orderedList' = do
- optional sp
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ char '[' *> anyOrderedListMarker <* char ']'
- spaces
- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
- spaces
- start <- option 1 $ try $ do controlSeq "setcounter"
- grouped (string "enum" *> many1 (oneOf "iv"))
- optional sp
- num <- grouped (many1 digit)
- spaces
- return (read num + 1 :: Int)
- bs <- listenv "enumerate" (many item)
- return $ orderedListWith (start, style, delim) bs
-
-paragraph :: PandocMonad m => LP m Blocks
-paragraph = do
- x <- trimInlines . mconcat <$> many1 inline
- if x == mempty
- then return mempty
- else return $ para x
-
-preamble :: PandocMonad m => LP m Blocks
-preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = void comment
- <|> void sp
- <|> void blanklines
- <|> void include
- <|> void macro
- <|> void blockCommand
- <|> void anyControlSeq
- <|> void braced
- <|> void anyChar
-
--------
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- char '{'
- optional sp
- keys <- manyTill citationLabel (char '}')
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m String
-citationLabel = optional sp *>
- (many1 (satisfy isBibtexKeyChar)
- <* optional sp
- <* optional (char ',')
- <* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then many1 simpleCiteArgs
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
-
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
- let ils = (toList . trimInlines . mconcat) <$>
- many (notFollowedBy (oneOf "\\};") >> inline)
- let parseOne = try $ do
- skipSpaces
- pref <- ils
- cit' <- inline -- expect a citation
- let citlist = toList cit'
- cits' <- case citlist of
- [Cite cs _] -> return cs
- _ -> mzero
- suff <- ils
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff cits'
- (c:cits, raw) <- withRaw $ grouped parseOne
- return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ raw)
-
--- tables
-
-parseAligns :: PandocMonad m => LP m [(String, Alignment, String)]
-parseAligns = try $ do
- char '{'
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
- maybeBar
- let cAlign = AlignCenter <$ char 'c'
- let lAlign = AlignLeft <$ char 'l'
- let rAlign = AlignRight <$ char 'r'
- let parAlign = AlignLeft <$ (char 'p' >> braced)
- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
- let alignPrefix = char '>' >> braced
- let alignSuffix = char '<' >> braced
- let alignSpec = do
- spaces
- pref <- option "" alignPrefix
- spaces
- ch <- alignChar
- spaces
- suff <- option "" alignSuffix
- return (pref, ch, suff)
- aligns' <- sepEndBy alignSpec maybeBar
- spaces
- char '}'
- spaces
- return $ aligns'
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces'
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces'
- optional $ bracketed (many1 (satisfy (/=']')))
- return ()
-
-lbreak :: PandocMonad m => LP m ()
-lbreak = () <$ try (spaces' *>
- (controlSeq "\\" <|> controlSeq "tabularnewline") <*
- spaces')
-
-amp :: PandocMonad m => LP m ()
-amp = () <$ try (spaces' *> char '&' <* spaces')
-
-parseTableRow :: PandocMonad m
- => Int -- ^ number of columns
- -> [String] -- ^ prefixes
- -> [String] -- ^ suffixes
- -> LP m [Blocks]
-parseTableRow cols prefixes suffixes = try $ do
- let tableCellRaw = many (notFollowedBy
- (amp <|> lbreak <|>
- (() <$ try (string "\\end"))) >> anyChar)
- let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
- env "minipage"
- (skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- let tableCell = minipage <|>
- ((plain . trimInlines . mconcat) <$> many inline)
- rawcells <- sepBy1 tableCellRaw amp
- guard $ length rawcells == cols
- let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s)
- rawcells prefixes suffixes
- cells' <- mapM (parseFromString tableCell) rawcells'
- let numcells = length cells'
- guard $ numcells <= cols && numcells >= 1
- guard $ cells' /= [mempty]
- -- note: a & b in a three-column table leaves an empty 3rd cell:
- let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces'
- return cells''
-
-spaces' :: PandocMonad m => LP m ()
-spaces' = spaces *> skipMany (comment *> spaces)
-
-simpTable :: PandocMonad m => Bool -> LP m Blocks
-simpTable hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces' >> tok)
- skipopts
- (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns
- let cols = length aligns
- optional $ controlSeq "caption" *> skipopts *> setCaption
- optional lbreak
- spaces'
- skipMany hline
- spaces'
- header' <- option [] $ try (parseTableRow cols prefixes suffixes <*
- lbreak <* many1 hline)
- spaces'
- rows <- sepEndBy (parseTableRow cols prefixes suffixes)
- (lbreak <* optional (skipMany hline))
- spaces'
- optional $ controlSeq "caption" *> skipopts *> setCaption
- optional lbreak
- spaces'
- let header'' = if null header'
- then replicate cols mempty
- else header'
- lookAhead $ controlSeq "end" -- make sure we're at end
- return $ table mempty (zip aligns (repeat 0)) header'' rows
-
-removeDoubleQuotes :: String -> String
-removeDoubleQuotes ('"':xs) =
- case reverse xs of
- '"':ys -> reverse ys
- _ -> '"':xs
-removeDoubleQuotes xs = xs
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
deleted file mode 100644
index 80a1cd7a2..000000000
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ /dev/null
@@ -1,2119 +0,0 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
-{-# LANGUAGE ScopedTypeVariables #-}
-
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of markdown-formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-
-import Data.List ( transpose, sortBy, findIndex, intercalate )
-import qualified Data.Map as M
-import Data.Scientific (coefficient, base10Exponent)
-import Data.Ord ( comparing )
-import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
-import Data.Maybe
-import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
-import Text.Pandoc.Generic (bottomUp)
-import qualified Data.Text as T
-import Data.Text (Text)
-import qualified Data.Yaml as Yaml
-import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..))
-import qualified Data.HashMap.Strict as H
-import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Data.Vector as V
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Shared
-import Text.Pandoc.Pretty (charWidth)
-import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
- isTextTag, isCommentTag )
-import Control.Monad
-import System.FilePath (takeExtension, addExtension)
-import Text.HTML.TagSoup
-import Data.Monoid ((<>))
-import Control.Monad.Trans (lift)
-import Control.Monad.Except (throwError, catchError)
-import Text.Pandoc.Class (PandocMonad, report)
-
-type MarkdownParser m = ParserT [Char] ParserState m
-
--- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readMarkdown opts s = do
- parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
---
--- Constants and data structure definitions
---
-
-isBulletListMarker :: Char -> Bool
-isBulletListMarker '*' = True
-isBulletListMarker '+' = True
-isBulletListMarker '-' = True
-isBulletListMarker _ = False
-
-isHruleChar :: Char -> Bool
-isHruleChar '*' = True
-isHruleChar '-' = True
-isHruleChar '_' = True
-isHruleChar _ = False
-
-setextHChars :: String
-setextHChars = "=-"
-
-isBlank :: Char -> Bool
-isBlank ' ' = True
-isBlank '\t' = True
-isBlank '\n' = True
-isBlank _ = False
-
---
--- auxiliary functions
---
-
--- | Succeeds when we're in list context.
-inList :: PandocMonad m => MarkdownParser m ()
-inList = do
- ctx <- stateParserContext <$> getState
- guard (ctx == ListItemState)
-
-spnl :: PandocMonad m => ParserT [Char] st m ()
-spnl = try $ do
- skipSpaces
- optional newline
- skipSpaces
- notFollowedBy (char '\n')
-
-indentSpaces :: PandocMonad m => MarkdownParser m String
-indentSpaces = try $ do
- tabStop <- getOption readerTabStop
- count tabStop (char ' ') <|>
- string "\t" <?> "indentation"
-
-nonindentSpaces :: PandocMonad m => MarkdownParser m String
-nonindentSpaces = do
- tabStop <- getOption readerTabStop
- sps <- many (char ' ')
- if length sps < tabStop
- then return sps
- else unexpected "indented line"
-
--- returns number of spaces parsed
-skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
-skipNonindentSpaces = do
- tabStop <- getOption readerTabStop
- atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ')
-
-atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int
-atMostSpaces n
- | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
- | otherwise = return 0
-
-litChar :: PandocMonad m => MarkdownParser m Char
-litChar = escapedChar'
- <|> characterReference
- <|> noneOf "\n"
- <|> try (newline >> notFollowedBy blankline >> return ' ')
-
--- | Parse a sequence of inline elements between square brackets,
--- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
-inlinesInBalancedBrackets = do
- char '['
- (_, raw) <- withRaw $ charsInBalancedBrackets 1
- guard $ not $ null raw
- parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
-
-charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m ()
-charsInBalancedBrackets 0 = return ()
-charsInBalancedBrackets openBrackets =
- (char '[' >> charsInBalancedBrackets (openBrackets + 1))
- <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
- <|> (( (() <$ code)
- <|> (() <$ (escapedChar'))
- <|> (newline >> notFollowedBy blankline)
- <|> skipMany1 (noneOf "[]`\n\\")
- <|> (() <$ count 1 (oneOf "`\\"))
- ) >> charsInBalancedBrackets openBrackets)
-
---
--- document structure
---
-
-rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
-rawTitleBlockLine = do
- char '%'
- skipSpaces
- first <- anyLine
- rest <- many $ try $ do spaceChar
- notFollowedBy blankline
- skipSpaces
- anyLine
- return $ trim $ unlines (first:rest)
-
-titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
-titleLine = try $ do
- raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
-
-authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
-authorsLine = try $ do
- raw <- rawTitleBlockLine
- let sep = (char ';' <* spaces) <|> newline
- let pAuthors = sepEndBy
- (trimInlinesF . mconcat <$> many
- (try $ notFollowedBy sep >> inline))
- sep
- sequence <$> parseFromString pAuthors raw
-
-dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
-dateLine = try $ do
- raw <- rawTitleBlockLine
- res <- parseFromString (many inline) raw
- return $ trimInlinesF $ mconcat res
-
-titleBlock :: PandocMonad m => MarkdownParser m ()
-titleBlock = pandocTitleBlock <|> mmdTitleBlock
-
-pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
- guardEnabled Ext_pandoc_title_block
- lookAhead (char '%')
- title <- option mempty titleLine
- author <- option (return []) authorsLine
- date <- option mempty dateLine
- optional blanklines
- let meta' = do title' <- title
- author' <- author
- date' <- date
- return $
- (if B.isNull title' then id else B.setMeta "title" title')
- . (if null author' then id else B.setMeta "author" author')
- . (if B.isNull date' then id else B.setMeta "date" date')
- $ nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-
--- Adapted from solution at
--- http://stackoverflow.com/a/29448764/1901888
-foldrWithKeyM :: Monad m => (k -> b -> a -> m a) -> a -> H.HashMap k b -> m a
-foldrWithKeyM f acc = H.foldrWithKey f' (return acc)
- where
- f' k b ma = ma >>= \a -> f k b a
-
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
- guardEnabled Ext_yaml_metadata_block
- pos <- getPosition
- string "---"
- blankline
- notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
- rawYamlLines <- manyTill anyLine stopLine
- -- by including --- and ..., we allow yaml blocks with just comments:
- let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
- optional blanklines
- opts <- stateOptions <$> getState
- meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) ->
- foldrWithKeyM
- (\k v m -> do
- if ignorable k
- then return m
- else (do v' <- lift $ yamlToMeta opts v
- return $ B.setMeta (T.unpack k) v' m)
- `catchError`
- (\_ -> return m)
- ) nullMeta hashmap
- Right Yaml.Null -> return nullMeta
- Right _ -> do
- logMessage $
- CouldNotParseYamlMetadata "not an object"
- pos
- return nullMeta
- Left err' -> do
- case err' of
- InvalidYaml (Just YamlParseException{
- yamlProblem = problem
- , yamlContext = _ctxt
- , yamlProblemMark = Yaml.YamlMark {
- yamlLine = yline
- , yamlColumn = ycol
- }}) ->
- logMessage $ CouldNotParseYamlMetadata
- problem (setSourceLine
- (setSourceColumn pos
- (sourceColumn pos + ycol))
- (sourceLine pos + 1 + yline))
- _ -> logMessage $ CouldNotParseYamlMetadata
- (show err') pos
- return nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> (return meta') }
- return mempty
-
--- ignore fields ending with _
-ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
-
-toMetaValue :: PandocMonad m => ReaderOptions -> Text -> m MetaValue
-toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
- where
- toMeta p =
- case p of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
- | endsWithNewline x -> MetaBlocks [Para xs]
- | otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
- opts' = opts{readerExtensions =
- disableExtension Ext_pandoc_title_block $
- disableExtension Ext_mmd_title_block $
- disableExtension Ext_yaml_metadata_block $
- readerExtensions opts }
-
-yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue
-yamlToMeta opts (Yaml.String t) = toMetaValue opts t
-yamlToMeta _ (Yaml.Number n)
- -- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = return $ MetaString $ show
- $ coefficient n * (10 ^ base10Exponent n)
- | otherwise = return $ MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
- (V.toList xs)
-yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
- if ignorable k
- then m
- else (do
- v' <- yamlToMeta opts v
- m' <- m
- return (M.insert (T.unpack k) v' m')))
- (return M.empty) o
-yamlToMeta _ _ = return $ MetaString ""
-
-stopLine :: PandocMonad m => MarkdownParser m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
-mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
- guardEnabled Ext_mmd_title_block
- firstPair <- kvPair False
- restPairs <- many (kvPair True)
- let kvPairs = firstPair : restPairs
- blanklines
- updateState $ \st -> st{ stateMeta' = stateMeta' st <>
- return (Meta $ M.fromList kvPairs) }
-
-kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
-kvPair allowEmpty = try $ do
- key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
- val <- trim <$> manyTill anyChar
- (try $ newline >> lookAhead (blankline <|> nonspaceChar))
- guard $ allowEmpty || not (null val)
- let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
- return (key',val')
-
-parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
-parseMarkdown = do
- optional titleBlock
- blocks <- parseBlocks
- st <- getState
- let meta = runF (stateMeta' st) st
- let Pandoc _ bs = B.doc $ runF blocks st
- eastAsianLineBreaks <- option False $
- True <$ guardEnabled Ext_east_asian_line_breaks
- reportLogMessages
- return $ (if eastAsianLineBreaks
- then bottomUp softBreakFilter
- else id) $ Pandoc meta bs
-
-softBreakFilter :: [Inline] -> [Inline]
-softBreakFilter (x:SoftBreak:y:zs) =
- case (stringify x, stringify y) of
- (xs@(_:_), (c:_))
- | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
- _ -> x:SoftBreak:y:zs
-softBreakFilter xs = xs
-
-referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
-referenceKey = try $ do
- pos <- getPosition
- skipNonindentSpaces
- (_,raw) <- reference
- char ':'
- skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL = liftM unwords $ many $ try $ do
- skipMany spaceChar
- notFollowedBy' referenceTitle
- notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
- notFollowedBy' (() <$ reference)
- many1 $ notFollowedBy space >> litChar
- let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
- src <- try betweenAngles <|> sourceURL
- tit <- option "" referenceTitle
- attr <- option nullAttr $ try $
- guardEnabled Ext_link_attributes >> skipSpaces >> attributes
- addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
- >> many (try $ spnl >> keyValAttr)
- blanklines
- let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
- target = (escapeURI $ trimr src, tit)
- st <- getState
- let oldkeys = stateKeys st
- let key = toKey raw
- case M.lookup key oldkeys of
- Just _ -> logMessage $ DuplicateLinkReference raw pos
- Nothing -> return ()
- updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
- return $ return mempty
-
-referenceTitle :: PandocMonad m => MarkdownParser m String
-referenceTitle = try $ do
- skipSpaces >> optional newline >> skipSpaces
- quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-
--- A link title in quotes
-quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
-quotedTitle c = try $ do
- char c
- notFollowedBy spaces
- let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
- let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
- let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
- unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
-
--- | PHP Markdown Extra style abbreviation key. Currently
--- we just skip them, since Pandoc doesn't have an element for
--- an abbreviation.
-abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
-abbrevKey = do
- guardEnabled Ext_abbreviations
- try $ do
- char '*'
- reference
- char ':'
- skipMany (satisfy (/= '\n'))
- blanklines
- return $ return mempty
-
-noteMarker :: PandocMonad m => MarkdownParser m String
-noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
-
-rawLine :: PandocMonad m => MarkdownParser m String
-rawLine = try $ do
- notFollowedBy blankline
- notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
- optional indentSpaces
- anyLine
-
-rawLines :: PandocMonad m => MarkdownParser m String
-rawLines = do
- first <- anyLine
- rest <- many rawLine
- return $ unlines (first:rest)
-
-noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-noteBlock = try $ do
- pos <- getPosition
- skipNonindentSpaces
- ref <- noteMarker
- char ':'
- optional blankline
- optional indentSpaces
- first <- rawLines
- rest <- many $ try $ blanklines >> indentSpaces >> rawLines
- let raw = unlines (first:rest) ++ "\n"
- optional blanklines
- parsed <- parseFromString parseBlocks raw
- let newnote = (ref, parsed)
- oldnotes <- stateNotes' <$> getState
- case lookup ref oldnotes of
- Just _ -> logMessage $ DuplicateNoteReference ref pos
- Nothing -> return ()
- updateState $ \s -> s { stateNotes' = newnote : oldnotes }
- return mempty
-
---
--- parsing blocks
---
-
-parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
-parseBlocks = mconcat <$> manyTill block eof
-
-block :: PandocMonad m => MarkdownParser m (F Blocks)
-block = do
- pos <- getPosition
- res <- choice [ mempty <$ blanklines
- , codeBlockFenced
- , yamlMetaBlock
- -- note: bulletList needs to be before header because of
- -- the possibility of empty list items: -
- , bulletList
- , header
- , lhsCodeBlock
- , divHtml
- , htmlBlock
- , table
- , codeBlockIndented
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
- , rawTeXBlock
- , lineBlock
- , blockQuote
- , hrule
- , orderedList
- , definitionList
- , noteBlock
- , referenceKey
- , abbrevKey
- , para
- , plain
- ] <?> "block"
- report $ ParsingTrace
- (take 60 $ show $ B.toList $ runF res defaultParserState) pos
- return res
-
---
--- header blocks
---
-
-header :: PandocMonad m => MarkdownParser m (F Blocks)
-header = setextHeader <|> atxHeader <?> "header"
-
-atxChar :: PandocMonad m => MarkdownParser m Char
-atxChar = do
- exts <- getOption readerExtensions
- return $ if extensionEnabled Ext_literate_haskell exts
- then '='
- else '#'
-
-atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
-atxHeader = try $ do
- level <- atxChar >>= many1 . char >>= return . length
- notFollowedBy $ guardEnabled Ext_fancy_lists >>
- (char '.' <|> char ')') -- this would be a list
- skipSpaces
- (text, raw) <- withRaw $
- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
- attr <- atxClosing
- attr' <- registerHeader attr (runF text defaultParserState)
- guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw attr'
- return $ B.headerWith attr' level <$> text
-
-atxClosing :: PandocMonad m => MarkdownParser m Attr
-atxClosing = try $ do
- attr' <- option nullAttr
- (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
- skipMany . char =<< atxChar
- skipSpaces
- attr <- option attr'
- (guardEnabled Ext_header_attributes >> attributes)
- blanklines
- return attr
-
-setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
-setextHeaderEnd = try $ do
- attr <- option nullAttr
- $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
- <|> (guardEnabled Ext_header_attributes >> attributes)
- blanklines
- return attr
-
-mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
-mmdHeaderIdentifier = do
- ident <- stripFirstAndLast . snd <$> reference
- skipSpaces
- return (ident,[],[])
-
-setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
-setextHeader = try $ do
- -- This lookahead prevents us from wasting time parsing Inlines
- -- unless necessary -- it gives a significant performance boost.
- lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- skipSpaces
- (text, raw) <- withRaw $
- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
- attr <- setextHeaderEnd
- underlineChar <- oneOf setextHChars
- many (char underlineChar)
- blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr' <- registerHeader attr (runF text defaultParserState)
- guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw attr'
- return $ B.headerWith attr' level <$> text
-
-registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
-registerImplicitHeader raw attr@(ident, _, _) = do
- let key = toKey $ "[" ++ raw ++ "]"
- updateState (\s -> s { stateHeaderKeys =
- M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
-
---
--- hrule block
---
-
-hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
-hrule = try $ do
- skipSpaces
- start <- satisfy isHruleChar
- count 2 (skipSpaces >> char start)
- skipMany (spaceChar <|> char start)
- newline
- optional blanklines
- return $ return B.horizontalRule
-
---
--- code blocks
---
-
-indentedLine :: PandocMonad m => MarkdownParser m String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
-
-blockDelimiter :: PandocMonad m
- => (Char -> Bool)
- -> Maybe Int
- -> ParserT [Char] st m Int
-blockDelimiter f len = try $ do
- c <- lookAhead (satisfy f)
- case len of
- Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
-
-attributes :: PandocMonad m => MarkdownParser m Attr
-attributes = try $ do
- char '{'
- spnl
- attrs <- many (attribute <* spnl)
- char '}'
- return $ foldl (\x f -> f x) nullAttr attrs
-
-attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
-attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
-
-identifier :: PandocMonad m => MarkdownParser m String
-identifier = do
- first <- letter
- rest <- many $ alphaNum <|> oneOf "-_:."
- return (first:rest)
-
-identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
-identifierAttr = try $ do
- char '#'
- result <- identifier
- return $ \(_,cs,kvs) -> (result,cs,kvs)
-
-classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
-classAttr = try $ do
- char '.'
- result <- identifier
- return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
-
-keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
-keyValAttr = try $ do
- key <- identifier
- char '='
- val <- enclosed (char '"') (char '"') litChar
- <|> enclosed (char '\'') (char '\'') litChar
- <|> many (escapedChar' <|> noneOf " \t\n\r}")
- return $ \(id',cs,kvs) ->
- case key of
- "id" -> (val,cs,kvs)
- "class" -> (id',cs ++ words val,kvs)
- _ -> (id',cs,kvs ++ [(key,val)])
-
-specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
-specialAttr = do
- char '-'
- return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-
-codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-codeBlockFenced = try $ do
- c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
- <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
- size <- blockDelimiter (== c) Nothing
- skipMany spaceChar
- attr <- option ([],[],[]) $
- try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
- blankline
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
- blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
-
--- correctly handle github language identifiers
-toLanguageId :: String -> String
-toLanguageId = map toLower . go
- where go "c++" = "cpp"
- go "objective-c" = "objectivec"
- go x = x
-
-codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
-codeBlockIndented = do
- contents <- many1 (indentedLine <|>
- try (do b <- blanklines
- l <- indentedLine
- return $ b ++ l))
- optional blanklines
- classes <- getOption readerIndentedCodeClasses
- return $ return $ B.codeBlockWith ("", classes, []) $
- stripTrailingNewlines $ concat contents
-
-lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lhsCodeBlock = do
- guardEnabled Ext_literate_haskell
- (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- (lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
- <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
- lhsCodeBlockInverseBird)
-
-lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
-lhsCodeBlockLaTeX = try $ do
- string "\\begin{code}"
- manyTill spaceChar newline
- contents <- many1Till anyChar (try $ string "\\end{code}")
- blanklines
- return $ stripTrailingNewlines contents
-
-lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
-lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-
-lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
-lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-
-lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
-lhsCodeBlockBirdWith c = try $ do
- pos <- getPosition
- when (sourceColumn pos /= 1) $ fail "Not in first column"
- lns <- many1 $ birdTrackLine c
- -- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
- else lns
- blanklines
- return $ intercalate "\n" lns'
-
-birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
-birdTrackLine c = try $ do
- char c
- -- allow html tags on left margin:
- when (c == '<') $ notFollowedBy letter
- anyLine
-
---
--- block quotes
---
-
-emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
-emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
-
-emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
-emailBlockQuote = try $ do
- emailBlockQuoteStart
- let emailLine = many $ nonEndline <|> try
- (endline >> notFollowedBy emailBlockQuoteStart >>
- return '\n')
- let emailSep = try (newline >> emailBlockQuoteStart)
- first <- emailLine
- rest <- many $ try $ emailSep >> emailLine
- let raw = first:rest
- newline <|> (eof >> return '\n')
- optional blanklines
- return raw
-
-blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
-blockQuote = do
- raw <- emailBlockQuote
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ B.blockQuote <$> contents
-
---
--- list blocks
---
-
-bulletListStart :: PandocMonad m => MarkdownParser m ()
-bulletListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
- skipNonindentSpaces
- notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
- satisfy isBulletListMarker
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- () <$ atMostSpaces (tabStop - (endpos - startpos))
-
-anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
-anyOrderedListStart = try $ do
- optional newline -- if preceded by a Plain block in a list context
- startpos <- sourceColumn <$> getPosition
- skipNonindentSpaces
- notFollowedBy $ string "p." >> spaceChar >> digit -- page number
- res <- do guardDisabled Ext_fancy_lists
- start <- many1 digit >>= safeRead
- char '.'
- return (start, DefaultStyle, DefaultDelim)
- <|> do (num, style, delim) <- anyOrderedListMarker
- -- if it could be an abbreviated first name,
- -- insist on more than one space
- when (delim == Period && (style == UpperAlpha ||
- (style == UpperRoman &&
- num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
- () <$ spaceChar
- return (num, style, delim)
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res
-
-listStart :: PandocMonad m => MarkdownParser m ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-
-listLine :: PandocMonad m => MarkdownParser m String
-listLine = try $ do
- notFollowedBy' (do indentSpaces
- many spaceChar
- listStart)
- notFollowedByHtmlCloser
- optional (() <$ indentSpaces)
- listLineCommon
-
-listLineCommon :: PandocMonad m => MarkdownParser m String
-listLineCommon = concat <$> manyTill
- ( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
- <|> liftM snd (htmlTag isCommentTag)
- <|> count 1 anyChar
- ) newline
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: PandocMonad m
- => MarkdownParser m a
- -> MarkdownParser m String
-rawListItem start = try $ do
- start
- first <- listLineCommon
- rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
- blanks <- many blankline
- return $ unlines (first:rest) ++ blanks
-
--- continuation of a list item - indented and separated by blankline
--- or (in compact lists) endline.
--- note: nested lists are parsed as continuations
-listContinuation :: PandocMonad m => MarkdownParser m String
-listContinuation = try $ do
- lookAhead indentSpaces
- result <- many1 listContinuationLine
- blanks <- many blankline
- return $ concat result ++ blanks
-
-notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
-notFollowedByHtmlCloser = do
- inHtmlBlock <- stateInHtmlBlock <$> getState
- case inHtmlBlock of
- Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
- Nothing -> return ()
-
-listContinuationLine :: PandocMonad m => MarkdownParser m String
-listContinuationLine = try $ do
- notFollowedBy blankline
- notFollowedBy' listStart
- notFollowedByHtmlCloser
- optional indentSpaces
- result <- anyLine
- return $ result ++ "\n"
-
-listItem :: PandocMonad m
- => MarkdownParser m a
- -> MarkdownParser m (F Blocks)
-listItem start = try $ do
- first <- rawListItem start
- continuations <- many listContinuation
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may contain various block elements:
- let raw = concat (first:continuations)
- contents <- parseFromString parseBlocks raw
- updateState (\st -> st {stateParserContext = oldContext})
- return contents
-
-orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
-orderedList = try $ do
- (start, style, delim) <- lookAhead anyOrderedListStart
- unless (style `elem` [DefaultStyle, Decimal, Example] &&
- delim `elem` [DefaultDelim, Period]) $
- guardEnabled Ext_fancy_lists
- when (style == Example) $ guardEnabled Ext_example_lists
- items <- fmap sequence $ many1 $ listItem
- ( try $ do
- optional newline -- if preceded by Plain block in a list
- startpos <- sourceColumn <$> getPosition
- skipNonindentSpaces
- res <- orderedListMarker style delim
- endpos <- sourceColumn <$> getPosition
- tabStop <- getOption readerTabStop
- lookAhead (newline <|> spaceChar)
- atMostSpaces (tabStop - (endpos - startpos))
- return res )
- start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
-
-bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
-bulletList = do
- items <- fmap sequence $ many1 $ listItem bulletListStart
- return $ B.bulletList <$> fmap compactify items
-
--- definition lists
-
-defListMarker :: PandocMonad m => MarkdownParser m ()
-defListMarker = do
- sps <- nonindentSpaces
- char ':' <|> char '~'
- tabStop <- getOption readerTabStop
- let remaining = tabStop - (length sps + 1)
- if remaining > 0
- then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
- else mzero
- return ()
-
-definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
-definitionListItem compact = try $ do
- rawLine' <- anyLine
- raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
- contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
- optional blanklines
- return $ liftM2 (,) term (sequence contents)
-
-defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
-defRawBlock compact = try $ do
- hasBlank <- option False $ blankline >> return True
- defListMarker
- firstline <- anyLine
- let dline = try
- ( do notFollowedBy blankline
- notFollowedByHtmlCloser
- if compact -- laziness not compatible with compact
- then () <$ indentSpaces
- else (() <$ indentSpaces)
- <|> notFollowedBy defListMarker
- anyLine )
- rawlines <- many dline
- cont <- liftM concat $ many $ try $ do
- trailing <- option "" blanklines
- ln <- indentSpaces >> notFollowedBy blankline >> anyLine
- lns <- many dline
- return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
- if hasBlank || not (null cont) then "\n\n" else ""
-
-definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
-definitionList = try $ do
- lookAhead (anyLine >>
- optional (blankline >> notFollowedBy (table >> return ())) >>
- -- don't capture table caption as def list!
- defListMarker)
- compactDefinitionList <|> normalDefinitionList
-
-compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
-compactDefinitionList = do
- guardEnabled Ext_compact_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem True
- return $ B.definitionList <$> fmap compactifyDL items
-
-normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
-normalDefinitionList = do
- guardEnabled Ext_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem False
- return $ B.definitionList <$> items
-
---
--- paragraph block
---
-
-para :: PandocMonad m => MarkdownParser m (F Blocks)
-para = try $ do
- exts <- getOption readerExtensions
- result <- trimInlinesF . mconcat <$> many1 inline
- option (B.plain <$> result)
- $ try $ do
- newline
- (blanklines >> return mempty)
- <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
- <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
- <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
- <|> (guardEnabled Ext_lists_without_preceding_blankline >>
- -- Avoid creating a paragraph in a nested list.
- notFollowedBy' inList >>
- () <$ lookAhead listStart)
- <|> do guardEnabled Ext_native_divs
- inHtmlBlock <- stateInHtmlBlock <$> getState
- case inHtmlBlock of
- Just "div" -> () <$
- lookAhead (htmlTag (~== TagClose "div"))
- _ -> mzero
- return $ do
- result' <- result
- case B.toList result' of
- [Image attr alt (src,tit)]
- | Ext_implicit_figures `extensionEnabled` exts ->
- -- the fig: at beginning of title indicates a figure
- return $ B.para $ B.singleton
- $ Image attr alt (src,'f':'i':'g':':':tit)
- _ -> return $ B.para result'
-
-plain :: PandocMonad m => MarkdownParser m (F Blocks)
-plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
-
---
--- raw html
---
-
-htmlElement :: PandocMonad m => MarkdownParser m String
-htmlElement = rawVerbatimBlock
- <|> strictHtmlBlock
- <|> liftM snd (htmlTag isBlockTag)
-
-htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-htmlBlock = do
- guardEnabled Ext_raw_html
- try (do
- (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
- (guard (t `elem` ["pre","style","script"]) >>
- (return . B.rawBlock "html") <$> rawVerbatimBlock)
- <|> (do guardEnabled Ext_markdown_attribute
- oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
- markdownAttribute <-
- case lookup "markdown" attrs of
- Just "0" -> False <$ updateState (\st -> st{
- stateMarkdownAttribute = False })
- Just _ -> True <$ updateState (\st -> st{
- stateMarkdownAttribute = True })
- Nothing -> return oldMarkdownAttribute
- res <- if markdownAttribute
- then rawHtmlBlocks
- else htmlBlock'
- updateState $ \st -> st{ stateMarkdownAttribute =
- oldMarkdownAttribute }
- return res)
- <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
- <|> htmlBlock'
-
-htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
-htmlBlock' = try $ do
- first <- htmlElement
- skipMany spaceChar
- optional blanklines
- return $ return $ B.rawBlock "html" first
-
-strictHtmlBlock :: PandocMonad m => MarkdownParser m String
-strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-
-rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
-rawVerbatimBlock = htmlInBalanced isVerbTag
- where isVerbTag (TagOpen "pre" _) = True
- isVerbTag (TagOpen "style" _) = True
- isVerbTag (TagOpen "script" _) = True
- isVerbTag _ = False
-
-rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-rawTeXBlock = do
- guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
- rawConTeXtEnvironment `sepEndBy1` blankline)
- spaces
- return $ return result
-
-rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
-rawHtmlBlocks = do
- (TagOpen tagtype _, raw) <- htmlTag isBlockTag
- -- try to find closing tag
- -- we set stateInHtmlBlock so that closing tags that can be either block or
- -- inline will not be parsed as inline tags
- oldInHtmlBlock <- stateInHtmlBlock <$> getState
- updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
- let closer = htmlTag (\x -> x ~== TagClose tagtype)
- contents <- mconcat <$> many (notFollowedBy' closer >> block)
- result <-
- (closer >>= \(_, rawcloser) -> return (
- return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
- contents <>
- return (B.rawBlock "html" rawcloser)))
- <|> return (return (B.rawBlock "html" raw) <> contents)
- updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
- return result
-
--- remove markdown="1" attribute
-stripMarkdownAttribute :: String -> String
-stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
- where filterAttrib (TagOpen t as) = TagOpen t
- [(k,v) | (k,v) <- as, k /= "markdown"]
- filterAttrib x = x
-
---
--- line block
---
-
-lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
- guardEnabled Ext_line_blocks
- lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
- return $ B.lineBlock <$> sequence lines'
-
---
--- Tables
---
-
--- Parse a dashed line with optional trailing spaces; return its length
--- and the length including trailing space.
-dashedLine :: PandocMonad m
- => Char
- -> ParserT [Char] st m (Int, Int)
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many spaceChar
- let lengthDashes = length dashes
- lengthSp = length sp
- return (lengthDashes, lengthDashes + lengthSp)
-
--- Parse a table header with dashed lines of '-' preceded by
--- one (or zero) line of text.
-simpleTableHeader :: PandocMonad m
- => Bool -- ^ Headerless table
- -> MarkdownParser m (F [Blocks], [Alignment], [Int])
-simpleTableHeader headless = try $ do
- rawContent <- if headless
- then return ""
- else anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
- -- If no header, calculate alignment on basis of first row of text
- rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
- if headless
- then lookAhead anyLine
- else return rawContent
- let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
- let rawHeads' = if headless
- then replicate (length dashes) ""
- else rawHeads
- heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
- $ map trim rawHeads'
- return (heads, aligns, indices)
-
--- Returns an alignment type for a table, based on a list of strings
--- (the rows of the column header) and a number (the length of the
--- dashed line under the rows.
-alignType :: [String]
- -> Int
- -> Alignment
-alignType [] _ = AlignDefault
-alignType strLst len =
- let nonempties = filter (not . null) $ map trimr strLst
- (leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
- [] -> (False, False)
- in case (leftSpace, rightSpace) of
- (True, False) -> AlignRight
- (False, True) -> AlignLeft
- (True, True) -> AlignCenter
- (False, False) -> AlignDefault
-
--- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: PandocMonad m => MarkdownParser m String
-tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
-
--- Parse a table separator - dashed line.
-tableSep :: PandocMonad m => MarkdownParser m Char
-tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-
--- Parse a raw line and split it into chunks by indices.
-rawTableLine :: PandocMonad m
- => [Int]
- -> MarkdownParser m [String]
-rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
- line <- many1Till anyChar newline
- return $ map trim $ tail $
- splitStringByIndices (init indices) line
-
--- Parse a table line and return a list of lists of blocks (columns).
-tableLine :: PandocMonad m
- => [Int]
- -> MarkdownParser m (F [Blocks])
-tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
-
--- Parse a multiline table row and return a list of blocks (columns).
-multilineRow :: PandocMonad m
- => [Int]
- -> MarkdownParser m (F [Blocks])
-multilineRow indices = do
- colLines <- many1 (rawTableLine indices)
- let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
-
--- Parses a table caption: inlines beginning with 'Table:'
--- and followed by blank lines.
-tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
- guardEnabled Ext_table_captions
- skipNonindentSpaces
- string ":" <|> string "Table:"
- trimInlinesF . mconcat <$> many1 inline <* blanklines
-
--- Parse a simple table with '---' header and one line per row.
-simpleTable :: PandocMonad m
- => Bool -- ^ Headerless table
- -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-simpleTable headless = do
- (aligns, _widths, heads', lines') <-
- tableWith (simpleTableHeader headless) tableLine
- (return ())
- (if headless then tableFooter else tableFooter <|> blanklines)
- -- Simple tables get 0s for relative column widths (i.e., use default)
- return (aligns, replicate (length aligns) 0, heads', lines')
-
--- Parse a multiline table: starts with row of '-' on top, then header
--- (which may be multiline), then the rows,
--- which may be multiline, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-multilineTable :: PandocMonad m
- => Bool -- ^ Headerless table
- -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
-
-multilineTableHeader :: PandocMonad m
- => Bool -- ^ Headerless table
- -> MarkdownParser m (F [Blocks], [Alignment], [Int])
-multilineTableHeader headless = try $ do
- unless headless $
- tableSep >> notFollowedBy blankline
- rawContent <- if headless
- then return $ repeat ""
- else many1 $ notFollowedBy tableSep >> anyLine
- initSp <- nonindentSpaces
- dashes <- many1 (dashedLine '-')
- newline
- let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
- rawHeadsList <- if headless
- then liftM (map (:[]) . tail .
- splitStringByIndices (init indices)) $ lookAhead anyLine
- else return $ transpose $ map
- (tail . splitStringByIndices (init indices))
- rawContent
- let aligns = zipWith alignType rawHeadsList lengths
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (unlines . map trim) rawHeadsList
- heads <- fmap sequence $
- mapM (parseFromString (mconcat <$> many plain)) $
- map trim rawHeads
- return (heads, aligns, indices)
-
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
-gridTable :: PandocMonad m => Bool -- ^ Headerless table
- -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow
- (gridTableSep '-') gridTableFooter
-
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitStringByIndices (init indices) $ trimr line
-
-gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment)
-gridPart ch = do
- leftColon <- option False (True <$ char ':')
- dashes <- many1 (char ch)
- rightColon <- option False (True <$ char ':')
- char '+'
- let lengthDashes = length dashes + (if leftColon then 1 else 0) +
- (if rightColon then 1 else 0)
- let alignment = case (leftColon, rightColon) of
- (True, True) -> AlignCenter
- (True, False) -> AlignLeft
- (False, True) -> AlignRight
- (False, False) -> AlignDefault
- return ((lengthDashes, lengthDashes + 1), alignment)
-
-gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
-
-removeFinalBar :: String -> String
-removeFinalBar =
- reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-
--- | Separator between rows of grid table.
-gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
--- | Parse header for a grid table.
-gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table
- -> MarkdownParser m (F [Blocks], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return []
- else many1 (try (char '|' >> anyLine))
- underDashes <- if headless
- then return dashes
- else gridDashedLines '='
- guard $ length dashes == length underDashes
- let lines' = map (snd . fst) underDashes
- let indices = scanl (+) 0 lines'
- let aligns = map snd underDashes
- let rawHeads = if headless
- then replicate (length underDashes) ""
- else map (unlines . map trim) $ transpose
- $ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
- return (heads, aligns, indices)
-
-gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String]
-gridTableRawLine indices = do
- char '|'
- line <- anyLine
- return (gridTableSplitLine indices line)
-
--- | Parse row of grid table.
-gridTableRow :: PandocMonad m => [Int]
- -> MarkdownParser m (F [Blocks])
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- | Parse footer for a grid table.
-gridTableFooter :: PandocMonad m => MarkdownParser m [Char]
-gridTableFooter = blanklines
-
-pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
-pipeBreak = try $ do
- nonindentSpaces
- openPipe <- (True <$ char '|') <|> return False
- first <- pipeTableHeaderPart
- rest <- many $ sepPipe *> pipeTableHeaderPart
- -- surrounding pipes needed for a one-column table:
- guard $ not (null rest && not openPipe)
- optional (char '|')
- blankline
- return $ unzip (first:rest)
-
-pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-pipeTable = try $ do
- nonindentSpaces
- lookAhead nonspaceChar
- (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
- let heads' = take (length aligns) <$> heads
- lines' <- many pipeTableRow
- let lines'' = map (take (length aligns) <$>) lines'
- let maxlength = maximum $
- map (\x -> length . stringify $ runF x def) (heads' : lines'')
- numColumns <- getOption readerColumns
- let widths = if maxlength > numColumns
- then map (\len ->
- fromIntegral (len + 1) / fromIntegral numColumns)
- seplengths
- else replicate (length aligns) 0.0
- return $ (aligns, widths, heads', sequence lines'')
-
-sepPipe :: PandocMonad m => MarkdownParser m ()
-sepPipe = try $ do
- char '|' <|> char '+'
- notFollowedBy blankline
-
--- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
-pipeTableRow = try $ do
- scanForPipe
- skipMany spaceChar
- openPipe <- (True <$ char '|') <|> return False
- -- split into cells
- let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
- <|> void (noneOf "|\n\r")
- let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
- parseFromString pipeTableCell
- cells <- cellContents `sepEndBy1` (char '|')
- -- surrounding pipes needed for a one-column table:
- guard $ not (length cells == 1 && not openPipe)
- blankline
- return $ sequence cells
-
-pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
-pipeTableCell = do
- result <- many inline
- if null result
- then return mempty
- else return $ B.plain . mconcat <$> sequence result
-
-pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
-pipeTableHeaderPart = try $ do
- skipMany spaceChar
- left <- optionMaybe (char ':')
- pipe <- many1 (char '-')
- right <- optionMaybe (char ':')
- skipMany spaceChar
- let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
- return $
- ((case (left,right) of
- (Nothing,Nothing) -> AlignDefault
- (Just _,Nothing) -> AlignLeft
- (Nothing,Just _) -> AlignRight
- (Just _,Just _) -> AlignCenter), len)
-
--- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT [Char] st m ()
-scanForPipe = do
- inp <- getInput
- case break (\c -> c == '\n' || c == '|') inp of
- (_,'|':_) -> return ()
- _ -> mzero
-
--- | Parse a table using 'headerParser', 'rowParser',
--- 'lineParser', and 'footerParser'. Variant of the version in
--- Text.Pandoc.Parsing.
-tableWith :: PandocMonad m
- => MarkdownParser m (F [Blocks], [Alignment], [Int])
- -> ([Int] -> MarkdownParser m (F [Blocks]))
- -> MarkdownParser m sep
- -> MarkdownParser m end
- -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (heads, aligns, indices) <- headerParser
- lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
- footerParser
- numColumns <- getOption readerColumns
- let widths = if (indices == [])
- then replicate (length aligns) 0.0
- else widthsFromIndices numColumns indices
- return $ (aligns, widths, heads, lines')
-
-table :: PandocMonad m => MarkdownParser m (F Blocks)
-table = try $ do
- frontCaption <- option Nothing (Just <$> tableCaption)
- (aligns, widths, heads, lns) <-
- try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
- try (guardEnabled Ext_multiline_tables >>
- multilineTable False) <|>
- try (guardEnabled Ext_simple_tables >>
- (simpleTable True <|> simpleTable False)) <|>
- try (guardEnabled Ext_multiline_tables >>
- multilineTable True) <|>
- try (guardEnabled Ext_grid_tables >>
- (gridTable False <|> gridTable True)) <?> "table"
- optional blanklines
- caption <- case frontCaption of
- Nothing -> option (return mempty) tableCaption
- Just c -> return c
- -- renormalize widths if greater than 100%:
- let totalWidth = sum widths
- let widths' = if totalWidth < 1
- then widths
- else map (/ totalWidth) widths
- return $ do
- caption' <- caption
- heads' <- heads
- lns' <- lns
- return $ B.table caption' (zip aligns widths') heads' lns'
-
---
--- inline
---
-
-inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
- , bareURL
- , str
- , endline
- , code
- , strongOrEmph
- , note
- , cite
- , bracketedSpan
- , link
- , image
- , math
- , strikeout
- , subscript
- , superscript
- , inlineNote -- after superscript because of ^[link](/foo)^
- , autoLink
- , spanHtml
- , rawHtmlInline
- , escapedChar
- , rawLaTeXInline'
- , exampleRef
- , smart
- , return . B.singleton <$> charRef
- , emoji
- , symbol
- , ltSign
- ] <?> "inline"
-
-escapedChar' :: PandocMonad m => MarkdownParser m Char
-escapedChar' = try $ do
- char '\\'
- (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
- <|> (guardEnabled Ext_angle_brackets_escapable >>
- oneOf "\\`*_{}[]()>#+-.!~\"<>")
- <|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
- <|> oneOf "\\`*_{}[]()>#+-.!~\""
-
-escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedChar = do
- result <- escapedChar'
- case result of
- ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- '\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
- _ -> return $ return $ B.str [result]
-
-ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
-ltSign = do
- guardDisabled Ext_raw_html
- <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
- char '<'
- return $ return $ B.str "<"
-
-exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
- guardEnabled Ext_example_lists
- char '@'
- lab <- many1 (alphaNum <|> oneOf "-_")
- return $ do
- st <- askF
- return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
-
-symbol :: PandocMonad m => MarkdownParser m (F Inlines)
-symbol = do
- result <- noneOf "<\\\n\t "
- <|> try (do lookAhead $ char '\\'
- notFollowedBy' (() <$ rawTeXBlock)
- char '\\')
- return $ return $ B.str [result]
-
--- parses inline code, between n `s and n `s
-code :: PandocMonad m => MarkdownParser m (F Inlines)
-code = try $ do
- starts <- many1 (char '`')
- skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
- (char '\n' >> notFollowedBy' blankline >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
- notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
- >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
-
-math :: PandocMonad m => MarkdownParser m (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
- (guardEnabled Ext_smart *> (return <$> apostrophe)
- <* notFollowedBy (space <|> satisfy isPunctuation))
-
--- Parses material enclosed in *s, **s, _s, or __s.
--- Designed to avoid backtracking.
-enclosure :: PandocMonad m
- => Char
- -> MarkdownParser m (F Inlines)
-enclosure c = do
- -- we can't start an enclosure with _ if after a string and
- -- the intraword_underscores extension is enabled:
- guardDisabled Ext_intraword_underscores
- <|> guard (c == '*')
- <|> (guard =<< notAfterString)
- cs <- many1 (char c)
- (return (B.str cs) <>) <$> whitespace
- <|> do
- case length cs of
- 3 -> three c
- 2 -> two c mempty
- 1 -> one c mempty
- _ -> return (return $ B.str cs)
-
-ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
-ender c n = try $ do
- count n (char c)
- guard (c == '*')
- <|> guardDisabled Ext_intraword_underscores
- <|> notFollowedBy alphaNum
-
--- Parse inlines til you hit one c or a sequence of two cs.
--- If one c, emit emph and then parse two.
--- If two cs, emit strong and then parse one.
--- Otherwise, emit ccc then the results.
-three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
-three c = do
- contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
- <|> return (return (B.str [c,c,c]) <> contents)
-
--- Parse inlines til you hit two c's, and emit strong.
--- If you never do hit two cs, emit ** plus inlines parsed.
-two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
-two c prefix' = do
- contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
- <|> return (return (B.str [c,c]) <> (prefix' <> contents))
-
--- Parse inlines til you hit a c, and emit emph.
--- If you never hit a c, emit * plus inlines parsed.
-one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
-one c prefix' = do
- contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
- <|> try (string [c,c] >>
- notFollowedBy (ender c 1) >>
- two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
- <|> return (return (B.str [c]) <> (prefix' <> contents))
-
-strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
-strongOrEmph = enclosure '*' <|> enclosure '_'
-
--- | Parses a list of inlines between start and end delimiters.
-inlinesBetween :: PandocMonad m
- => (Show b)
- => MarkdownParser m a
- -> MarkdownParser m b
- -> MarkdownParser m (F Inlines)
-inlinesBetween start end =
- (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace <* notFollowedBy' end
-
-strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
-strikeout = fmap B.strikeout <$>
- (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
- where strikeStart = string "~~" >> lookAhead nonspaceChar
- >> notFollowedBy (char '~')
- strikeEnd = try $ string "~~"
-
-superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
- guardEnabled Ext_superscript
- char '^'
- mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-
-subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
- guardEnabled Ext_subscript
- char '~'
- mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-
-whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
-whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
- where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
- regsp = skipMany spaceChar >> return B.space
-
-nonEndline :: PandocMonad m => ParserT [Char] st m Char
-nonEndline = satisfy (/='\n')
-
-str :: PandocMonad m => MarkdownParser m (F Inlines)
-str = do
- result <- many1 alphaNum
- updateLastStrPos
- let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if isSmart
- then case likelyAbbrev result of
- [] -> return $ return $ B.str result
- xs -> choice (map (\x ->
- try (string x >> oneOf " \n" >>
- lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
-
--- | if the string matches the beginning of an abbreviation (before
--- the first period, return strings that would finish the abbreviation.
-likelyAbbrev :: String -> [String]
-likelyAbbrev x =
- let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
- "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
- "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
- "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
- "ch.", "sec.", "cf.", "cp."]
- abbrPairs = map (break (=='.')) abbrevs
- in map snd $ filter (\(y,_) -> y == x) abbrPairs
-
--- an endline character that can be treated as a space, not a structural break
-endline :: PandocMonad m => MarkdownParser m (F Inlines)
-endline = try $ do
- newline
- notFollowedBy blankline
- -- parse potential list-starts differently if in a list:
- notFollowedBy (inList >> listStart)
- guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
- guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
- guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
- guardDisabled Ext_backtick_code_blocks <|>
- notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
- notFollowedByHtmlCloser
- (eof >> return mempty)
- <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
- <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
- <|> (skipMany spaceChar >> return (return B.softbreak))
-
---
--- links
---
-
--- a reference label for a link
-reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
-reference = do notFollowedBy' (string "[^") -- footnote reference
- withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-
-parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
-parenthesizedChars = do
- result <- charsInBalanced '(' ')' litChar
- return $ '(' : result ++ ")"
-
--- source for a link, with optional title
-source :: PandocMonad m => MarkdownParser m (String, String)
-source = do
- char '('
- skipSpaces
- let urlChunk =
- try parenthesizedChars
- <|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
- <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
- let sourceURL = (unwords . words . concat) <$> many urlChunk
- let betweenAngles = try $
- char '<' >> manyTill litChar (char '>')
- src <- try betweenAngles <|> sourceURL
- tit <- option "" $ try $ spnl >> linkTitle
- skipSpaces
- char ')'
- return (escapeURI $ trimr src, tit)
-
-linkTitle :: PandocMonad m => MarkdownParser m String
-linkTitle = quotedTitle '"' <|> quotedTitle '\''
-
-link :: PandocMonad m => MarkdownParser m (F Inlines)
-link = try $ do
- st <- getState
- guard $ stateAllowLinks st
- setState $ st{ stateAllowLinks = False }
- (lab,raw) <- reference
- setState $ st{ stateAllowLinks = True }
- regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-
-bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
- guardEnabled Ext_bracketed_spans
- (lab,_) <- reference
- attr <- attributes
- let (ident,classes,keyvals) = attr
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> lab
- _ -> return $ B.spanWith attr <$> lab
-
-regLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
- -> F Inlines
- -> MarkdownParser m (F Inlines)
-regLink constructor lab = try $ do
- (src, tit) <- source
- attr <- option nullAttr $
- guardEnabled Ext_link_attributes >> attributes
- return $ constructor attr src tit <$> lab
-
--- a link like [this][ref] or [this][] or [this]
-referenceLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
- -> (F Inlines, String)
- -> MarkdownParser m (F Inlines)
-referenceLink constructor (lab, raw) = do
- sp <- (True <$ lookAhead (char ' ')) <|> return False
- (_,raw') <- option (mempty, "") $
- lookAhead (try (guardEnabled Ext_citations >>
- spnl >> normalCite >> return (mempty, "")))
- <|>
- try (spnl >> reference)
- when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
- let labIsRef = raw' == "" || raw' == "[]"
- let key = toKey $ if labIsRef then raw else raw'
- parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
- implicitHeaderRefs <- option False $
- True <$ guardEnabled Ext_implicit_header_references
- let makeFallback = do
- parsedRaw' <- parsedRaw
- fallback' <- fallback
- return $ B.str "[" <> fallback' <> B.str "]" <>
- (if sp && not (null raw) then B.space else mempty) <>
- parsedRaw'
- return $ do
- keys <- asksF stateKeys
- case M.lookup key keys of
- Nothing ->
- if implicitHeaderRefs
- then do
- headerKeys <- asksF stateHeaderKeys
- case M.lookup key headerKeys of
- Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
- Nothing -> makeFallback
- else makeFallback
- Just ((src,tit), attr) -> constructor attr src tit <$> lab
-
-dropBrackets :: String -> String
-dropBrackets = reverse . dropRB . reverse . dropLB
- where dropRB (']':xs) = xs
- dropRB xs = xs
- dropLB ('[':xs) = xs
- dropLB xs = xs
-
-bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
- guardEnabled Ext_autolink_bare_uris
- getState >>= guard . stateAllowLinks
- (orig, src) <- uri <|> emailAddress
- notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
- return $ return $ B.link src "" (B.str orig)
-
-autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
-autoLink = try $ do
- getState >>= guard . stateAllowLinks
- char '<'
- (orig, src) <- uri <|> emailAddress
- -- in rare cases, something may remain after the uri parser
- -- is finished, because the uri parser tries to avoid parsing
- -- final punctuation. for example: in `<http://hi---there>`,
- -- the URI parser will stop before the dashes.
- extra <- fromEntities <$> manyTill nonspaceChar (char '>')
- attr <- option nullAttr $ try $
- guardEnabled Ext_link_attributes >> attributes
- return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
-
-image :: PandocMonad m => MarkdownParser m (F Inlines)
-image = try $ do
- char '!'
- (lab,raw) <- reference
- defaultExt <- getOption readerDefaultImageExtension
- let constructor attr' src = case takeExtension src of
- "" -> B.imageWith attr' (addExtension src defaultExt)
- _ -> B.imageWith attr' src
- regLink constructor lab <|> referenceLink constructor (lab,raw)
-
-note :: PandocMonad m => MarkdownParser m (F Inlines)
-note = try $ do
- guardEnabled Ext_footnotes
- ref <- noteMarker
- return $ do
- notes <- asksF stateNotes'
- case lookup ref notes of
- Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- -- process the note in a context that doesn't resolve
- -- notes, to avoid infinite looping with notes inside
- -- notes:
- let contents' = runF contents st{ stateNotes' = [] }
- return $ B.note contents'
-
-inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
- guardEnabled Ext_inline_notes
- char '^'
- contents <- inlinesInBalancedBrackets
- return $ B.note . B.para <$> contents
-
-rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
- guardEnabled Ext_raw_tex
- lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
-
-rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
-rawConTeXtEnvironment = try $ do
- string "\\start"
- completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
- (try $ string "\\stop" >> string completion)
- return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-
-inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
-inBrackets parser = do
- char '['
- contents <- many parser
- char ']'
- return $ "[" ++ contents ++ "]"
-
-spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
- guardEnabled Ext_native_spans
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
- contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
- let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- case lookup "style" keyvals of
- Just s | null ident && null classes &&
- map toLower (filter (`notElem` " \t;") s) ==
- "font-variant:small-caps"
- -> return $ B.smallcaps <$> contents
- _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
-
-divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
- guardEnabled Ext_native_divs
- (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
- -- we set stateInHtmlBlock so that closing tags that can be either block or
- -- inline will not be parsed as inline tags
- oldInHtmlBlock <- stateInHtmlBlock <$> getState
- updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
- bls <- option "" (blankline >> option "" blanklines)
- contents <- mconcat <$>
- many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
- closed <- option False (True <$ htmlTag (~== TagClose "div"))
- if closed
- then do
- updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
- let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
- else -- avoid backtracing
- return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
-
-rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
-rawHtmlInline = do
- guardEnabled Ext_raw_html
- inHtmlBlock <- stateInHtmlBlock <$> getState
- let isCloseBlockTag t = case inHtmlBlock of
- Just t' -> t ~== TagClose t'
- Nothing -> False
- mdInHtml <- option False $
- ( guardEnabled Ext_markdown_in_html_blocks
- <|> guardEnabled Ext_markdown_attribute
- ) >> return True
- (_,result) <- htmlTag $ if mdInHtml
- then (\x -> isInlineTag x &&
- not (isCloseBlockTag x))
- else not . isTextTag
- return $ return $ B.rawInline "html" result
-
--- Emoji
-
-emojiChars :: [Char]
-emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
-
-emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
- guardEnabled Ext_emoji
- char ':'
- emojikey <- many1 (oneOf emojiChars)
- char ':'
- case M.lookup emojikey emojis of
- Just s -> return (return (B.str s))
- Nothing -> mzero
-
--- Citations
-
-cite :: PandocMonad m => MarkdownParser m (F Inlines)
-cite = do
- guardEnabled Ext_citations
- citations <- textualCite
- <|> do (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
- return citations
-
-textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
-textualCite = try $ do
- (_, key) <- citeKey
- let first = Citation{ citationId = key
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
- mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
- case mbrest of
- Just (rest, raw) ->
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
- <$> rest
- Nothing ->
- (do
- (cs, raw) <- withRaw $ bareloc first
- let (spaces',raw') = span isSpace raw
- spc | null spaces' = mempty
- | otherwise = B.space
- lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
- fallback <- referenceLink B.linkWith (lab,raw')
- return $ do
- fallback' <- fallback
- cs' <- cs
- return $
- case B.toList fallback' of
- Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
- _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
- <|> return (do st <- askF
- return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] $ B.str $ '@':key)
-
-bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
-bareloc c = try $ do
- spnl
- char '['
- notFollowedBy $ char '^'
- suff <- suffix
- rest <- option (return []) $ try $ char ';' >> citeList
- spnl
- char ']'
- notFollowedBy $ oneOf "[("
- return $ do
- suff' <- suff
- rest' <- rest
- return $ c{ citationSuffix = B.toList suff' } : rest'
-
-normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
-normalCite = try $ do
- char '['
- spnl
- citations <- citeList
- spnl
- char ']'
- return citations
-
-suffix :: PandocMonad m => MarkdownParser m (F Inlines)
-suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- spnl
- rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
-prefix :: PandocMonad m => MarkdownParser m (F Inlines)
-prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-
-citeList :: PandocMonad m => MarkdownParser m (F [Citation])
-citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
-
-citation :: PandocMonad m => MarkdownParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
-
-smart :: PandocMonad m => MarkdownParser m (F Inlines)
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [apostrophe, dash, ellipses])
-
-singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline singleQuoteEnd
-
--- doubleQuoted will handle regular double-quoted sections, as well
--- as dialogues with an open double-quote without a close double-quote
--- in the same paragraph.
-doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
- doubleQuoteStart
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
deleted file mode 100644
index 14f9da9b6..000000000
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ /dev/null
@@ -1,677 +0,0 @@
-{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
--- RelaxedPolyRec needed for inlinesBetween on GHC < 7
-{-
- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.MediaWiki
- Copyright : Copyright (C) 2012-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of mediawiki text to 'Pandoc' document.
--}
-{-
-TODO:
-_ correctly handle tables within tables
-_ parse templates?
--}
-module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
-
-import Text.Pandoc.Definition
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import Data.Monoid ((<>))
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
-import Text.Pandoc.XML ( fromEntities )
-import Text.Pandoc.Parsing hiding ( nested )
-import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
-import Control.Monad
-import Data.List (intersperse, intercalate, isPrefixOf )
-import Text.HTML.TagSoup
-import Data.Sequence (viewl, ViewL(..), (<|))
-import qualified Data.Foldable as F
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import Data.Char (isDigit, isSpace)
-import Data.Maybe (fromMaybe)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, report)
-
--- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readMediaWiki opts s = do
- parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
- , mwMaxNestingLevel = 4
- , mwNextLinkNumber = 1
- , mwCategoryLinks = []
- , mwHeaderMap = M.empty
- , mwIdentifierList = Set.empty
- }
- (s ++ "\n")
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-data MWState = MWState { mwOptions :: ReaderOptions
- , mwMaxNestingLevel :: Int
- , mwNextLinkNumber :: Int
- , mwCategoryLinks :: [Inlines]
- , mwHeaderMap :: M.Map Inlines String
- , mwIdentifierList :: Set.Set String
- }
-
-type MWParser m = ParserT [Char] MWState m
-
-instance HasReaderOptions MWState where
- extractReaderOptions = mwOptions
-
-instance HasHeaderMap MWState where
- extractHeaderMap = mwHeaderMap
- updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st }
-
-instance HasIdentifierList MWState where
- extractIdentifierList = mwIdentifierList
- updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
-
---
--- auxiliary functions
---
-
--- This is used to prevent exponential blowups for things like:
--- ''a'''a''a'''a''a'''a''a'''a
-nested :: PandocMonad m => MWParser m a -> MWParser m a
-nested p = do
- nestlevel <- mwMaxNestingLevel `fmap` getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ mwMaxNestingLevel = nestlevel }
- return res
-
-specialChars :: [Char]
-specialChars = "'[]<=&*{}|\":\\"
-
-spaceChars :: [Char]
-spaceChars = " \n\t"
-
-sym :: PandocMonad m => String -> MWParser m ()
-sym s = () <$ try (string s)
-
-newBlockTags :: [String]
-newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
-
-isBlockTag' :: Tag String -> Bool
-isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
- t `notElem` eitherBlockOrInline
-isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
- t `notElem` eitherBlockOrInline
-isBlockTag' tag = isBlockTag tag
-
-isInlineTag' :: Tag String -> Bool
-isInlineTag' (TagComment _) = True
-isInlineTag' t = not (isBlockTag' t)
-
-eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
- "map", "area", "object"]
-
-htmlComment :: PandocMonad m => MWParser m ()
-htmlComment = () <$ htmlTag isCommentTag
-
-inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
-inlinesInTags tag = try $ do
- (_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
- then return mempty
- else trimInlines . mconcat <$>
- manyTill inline (htmlTag (~== TagClose tag))
-
-blocksInTags :: PandocMonad m => String -> MWParser m Blocks
-blocksInTags tag = try $ do
- (_,raw) <- htmlTag (~== TagOpen tag [])
- let closer = if tag == "li"
- then htmlTag (~== TagClose "li")
- <|> lookAhead (
- htmlTag (~== TagOpen "li" [])
- <|> htmlTag (~== TagClose "ol")
- <|> htmlTag (~== TagClose "ul"))
- else htmlTag (~== TagClose tag)
- if '/' `elem` raw -- self-closing tag
- then return mempty
- else mconcat <$> manyTill block closer
-
-charsInTags :: PandocMonad m => String -> MWParser m [Char]
-charsInTags tag = try $ do
- (_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
- then return ""
- else manyTill anyChar (htmlTag (~== TagClose tag))
-
---
--- main parser
---
-
-parseMediaWiki :: PandocMonad m => MWParser m Pandoc
-parseMediaWiki = do
- bs <- mconcat <$> many block
- spaces
- eof
- categoryLinks <- reverse . mwCategoryLinks <$> getState
- let categories = if null categoryLinks
- then mempty
- else B.para $ mconcat $ intersperse B.space categoryLinks
- return $ B.doc $ bs <> categories
-
---
--- block parsers
---
-
-block :: PandocMonad m => MWParser m Blocks
-block = do
- pos <- getPosition
- res <- mempty <$ skipMany1 blankline
- <|> table
- <|> header
- <|> hrule
- <|> orderedList
- <|> bulletList
- <|> definitionList
- <|> mempty <$ try (spaces *> htmlComment)
- <|> preformatted
- <|> blockTag
- <|> (B.rawBlock "mediawiki" <$> template)
- <|> para
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
- return res
-
-para :: PandocMonad m => MWParser m Blocks
-para = do
- contents <- trimInlines . mconcat <$> many1 inline
- if F.all (==Space) contents
- then return mempty
- else return $ B.para contents
-
-table :: PandocMonad m => MWParser m Blocks
-table = do
- tableStart
- styles <- option [] parseAttrs <* blankline
- let tableWidth = case lookup "width" styles of
- Just w -> fromMaybe 1.0 $ parseWidth w
- Nothing -> 1.0
- caption <- option mempty tableCaption
- optional rowsep
- hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!'))
- (cellspecs',hdr) <- unzip <$> tableRow
- let widths = map ((tableWidth *) . snd) cellspecs'
- let restwidth = tableWidth - sum widths
- let zerocols = length $ filter (==0.0) widths
- let defaultwidth = if zerocols == 0 || zerocols == length widths
- then 0.0
- else restwidth / fromIntegral zerocols
- let widths' = map (\w -> if w == 0 then defaultwidth else w) widths
- let cellspecs = zip (map fst cellspecs') widths'
- rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
- optional blanklines
- tableEnd
- let cols = length hdr
- let (headers,rows) = if hasheader
- then (hdr, rows')
- else (replicate cols mempty, hdr:rows')
- return $ B.table caption cellspecs headers rows
-
-parseAttrs :: PandocMonad m => MWParser m [(String,String)]
-parseAttrs = many1 parseAttr
-
-parseAttr :: PandocMonad m => MWParser m (String, String)
-parseAttr = try $ do
- skipMany spaceChar
- k <- many1 letter
- char '='
- v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
- <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
- return (k,v)
-
-tableStart :: PandocMonad m => MWParser m ()
-tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|"
-
-tableEnd :: PandocMonad m => MWParser m ()
-tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}"
-
-rowsep :: PandocMonad m => MWParser m ()
-rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
- optional parseAttr <* blanklines
-
-cellsep :: PandocMonad m => MWParser m ()
-cellsep = try $
- (guardColumnOne *> skipSpaces <*
- ( (char '|' <* notFollowedBy (oneOf "-}+"))
- <|> (char '!')
- )
- )
- <|> (() <$ try (string "||"))
- <|> (() <$ try (string "!!"))
-
-tableCaption :: PandocMonad m => MWParser m Inlines
-tableCaption = try $ do
- guardColumnOne
- skipSpaces
- sym "|+"
- optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces)
- (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
-
-tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
-tableRow = try $ skipMany htmlComment *> many tableCell
-
-tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
-tableCell = try $ do
- cellsep
- skipMany spaceChar
- attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
- notFollowedBy (char '|')
- skipMany spaceChar
- ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
- ((snd <$> withRaw table) <|> count 1 anyChar))
- bs <- parseFromString (mconcat <$> many block) ls
- let align = case lookup "align" attrs of
- Just "left" -> AlignLeft
- Just "right" -> AlignRight
- Just "center" -> AlignCenter
- _ -> AlignDefault
- let width = case lookup "width" attrs of
- Just xs -> fromMaybe 0.0 $ parseWidth xs
- Nothing -> 0.0
- return ((align, width), bs)
-
-parseWidth :: String -> Maybe Double
-parseWidth s =
- case reverse s of
- ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
- _ -> Nothing
-
-template :: PandocMonad m => MWParser m String
-template = try $ do
- string "{{"
- notFollowedBy (char '{')
- lookAhead $ letter <|> digit <|> char ':'
- let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
- contents <- manyTill chunk (try $ string "}}")
- return $ "{{" ++ concat contents ++ "}}"
-
-blockTag :: PandocMonad m => MWParser m Blocks
-blockTag = do
- (tag, _) <- lookAhead $ htmlTag isBlockTag'
- case tag of
- TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
- TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
- TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
- TagOpen "source" attrs -> syntaxhighlight "source" attrs
- TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
- charsInTags "haskell"
- TagOpen "gallery" _ -> blocksInTags "gallery"
- TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
- TagClose "p" -> mempty <$ htmlTag (~== tag)
- _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
-
-trimCode :: String -> String
-trimCode ('\n':xs) = stripTrailingNewlines xs
-trimCode xs = stripTrailingNewlines xs
-
-syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
-syntaxhighlight tag attrs = try $ do
- let mblang = lookup "lang" attrs
- let mbstart = lookup "start" attrs
- let mbline = lookup "line" attrs
- let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
- let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
- contents <- charsInTags tag
- return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
-
-hrule :: PandocMonad m => MWParser m Blocks
-hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
-
-guardColumnOne :: PandocMonad m => MWParser m ()
-guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-
-preformatted :: PandocMonad m => MWParser m Blocks
-preformatted = try $ do
- guardColumnOne
- char ' '
- let endline' = B.linebreak <$ (try $ newline <* char ' ')
- let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
- let spToNbsp ' ' = '\160'
- spToNbsp x = x
- let nowiki' = mconcat . intersperse B.linebreak . map B.str .
- lines . fromEntities . map spToNbsp <$> try
- (htmlTag (~== TagOpen "nowiki" []) *>
- manyTill anyChar (htmlTag (~== TagClose "nowiki")))
- let inline' = whitespace' <|> endline' <|> nowiki'
- <|> (try $ notFollowedBy newline *> inline)
- contents <- mconcat <$> many1 inline'
- let spacesStr (Str xs) = all isSpace xs
- spacesStr _ = False
- if F.all spacesStr contents
- then return mempty
- else return $ B.para $ encode contents
-
-encode :: Inlines -> Inlines
-encode = B.fromList . normalizeCode . B.toList . walk strToCode
- where strToCode (Str s) = Code ("",[],[]) s
- strToCode Space = Code ("",[],[]) " "
- strToCode x = x
- normalizeCode [] = []
- normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
- normalizeCode $ (Code a1 (x ++ y)) : zs
- normalizeCode (x:xs) = x : normalizeCode xs
-
-header :: PandocMonad m => MWParser m Blocks
-header = try $ do
- guardColumnOne
- eqs <- many1 (char '=')
- let lev = length eqs
- guard $ lev <= 6
- contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
- attr <- registerHeader nullAttr contents
- return $ B.headerWith attr lev contents
-
-bulletList :: PandocMonad m => MWParser m Blocks
-bulletList = B.bulletList <$>
- ( many1 (listItem '*')
- <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
- optional (htmlTag (~== TagClose "ul"))) )
-
-orderedList :: PandocMonad m => MWParser m Blocks
-orderedList =
- (B.orderedList <$> many1 (listItem '#'))
- <|> try
- (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
- spaces
- items <- many (listItem '#' <|> li)
- optional (htmlTag (~== TagClose "ol"))
- let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
- return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
-
-definitionList :: PandocMonad m => MWParser m Blocks
-definitionList = B.definitionList <$> many1 defListItem
-
-defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks])
-defListItem = try $ do
- terms <- mconcat . intersperse B.linebreak <$> many defListTerm
- -- we allow dd with no dt, or dt with no dd
- defs <- if B.isNull terms
- then notFollowedBy
- (try $ skipMany1 (char ':') >> string "<math>") *>
- many1 (listItem ':')
- else many (listItem ':')
- return (terms, defs)
-
-defListTerm :: PandocMonad m => MWParser m Inlines
-defListTerm = char ';' >> skipMany spaceChar >> anyLine >>=
- parseFromString (trimInlines . mconcat <$> many inline)
-
-listStart :: PandocMonad m => Char -> MWParser m ()
-listStart c = char c *> notFollowedBy listStartChar
-
-listStartChar :: PandocMonad m => MWParser m Char
-listStartChar = oneOf "*#;:"
-
-anyListStart :: PandocMonad m => MWParser m Char
-anyListStart = char '*'
- <|> char '#'
- <|> char ':'
- <|> char ';'
-
-li :: PandocMonad m => MWParser m Blocks
-li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
- (firstParaToPlain <$> blocksInTags "li") <* spaces
-
-listItem :: PandocMonad m => Char -> MWParser m Blocks
-listItem c = try $ do
- extras <- many (try $ char c <* lookAhead listStartChar)
- if null extras
- then listItem' c
- else do
- skipMany spaceChar
- first <- concat <$> manyTill listChunk newline
- rest <- many
- (try $ string extras *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
- contents <- parseFromString (many1 $ listItem' c)
- (unlines (first : rest))
- case c of
- '*' -> return $ B.bulletList contents
- '#' -> return $ B.orderedList contents
- ':' -> return $ B.definitionList [(mempty, contents)]
- _ -> mzero
-
--- The point of this is to handle stuff like
--- * {{cite book
--- | blah
--- | blah
--- }}
--- * next list item
--- which seems to be valid mediawiki.
-listChunk :: PandocMonad m => MWParser m String
-listChunk = template <|> count 1 anyChar
-
-listItem' :: PandocMonad m => Char -> MWParser m Blocks
-listItem' c = try $ do
- listStart c
- skipMany spaceChar
- first <- concat <$> manyTill listChunk newline
- rest <- many (try $ char c *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
- parseFromString (firstParaToPlain . mconcat <$> many1 block)
- $ unlines $ first : rest
-
-firstParaToPlain :: Blocks -> Blocks
-firstParaToPlain contents =
- case viewl (B.unMany contents) of
- (Para xs) :< ys -> B.Many $ (Plain xs) <| ys
- _ -> contents
-
---
--- inline parsers
---
-
-inline :: PandocMonad m => MWParser m Inlines
-inline = whitespace
- <|> url
- <|> str
- <|> doubleQuotes
- <|> strong
- <|> emph
- <|> image
- <|> internalLink
- <|> externalLink
- <|> math
- <|> inlineTag
- <|> B.singleton <$> charRef
- <|> inlineHtml
- <|> (B.rawInline "mediawiki" <$> variable)
- <|> (B.rawInline "mediawiki" <$> template)
- <|> special
-
-str :: PandocMonad m => MWParser m Inlines
-str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
-
-math :: PandocMonad m => MWParser m Inlines
-math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
- <|> (B.math . trim <$> charsInTags "math")
- <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
- <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
- where dmStart = string "\\["
- dmEnd = try (string "\\]")
- mStart = string "\\("
- mEnd = try (string "\\)")
-
-variable :: PandocMonad m => MWParser m String
-variable = try $ do
- string "{{{"
- contents <- manyTill anyChar (try $ string "}}}")
- return $ "{{{" ++ contents ++ "}}}"
-
-inlineTag :: PandocMonad m => MWParser m Inlines
-inlineTag = do
- (tag, _) <- lookAhead $ htmlTag isInlineTag'
- case tag of
- TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
- TagOpen "nowiki" _ -> try $ do
- (_,raw) <- htmlTag (~== tag)
- if '/' `elem` raw
- then return mempty
- else B.text . fromEntities <$>
- manyTill anyChar (htmlTag (~== TagClose "nowiki"))
- TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
- *> optional blankline)
- TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
- TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
- TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
- TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
- TagOpen "code" _ -> encode <$> inlinesInTags "code"
- TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
- TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
- _ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
-
-special :: PandocMonad m => MWParser m Inlines
-special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
- oneOf specialChars)
-
-inlineHtml :: PandocMonad m => MWParser m Inlines
-inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
-
-whitespace :: PandocMonad m => MWParser m Inlines
-whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment)
- <|> B.softbreak <$ endline
-
-endline :: PandocMonad m => MWParser m ()
-endline = () <$ try (newline <*
- notFollowedBy spaceChar <*
- notFollowedBy newline <*
- notFollowedBy' hrule <*
- notFollowedBy tableStart <*
- notFollowedBy' header <*
- notFollowedBy anyListStart)
-
-imageIdentifiers :: PandocMonad m => [MWParser m ()]
-imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
- where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
- "Bild"]
-
-image :: PandocMonad m => MWParser m Inlines
-image = try $ do
- sym "[["
- choice imageIdentifiers
- fname <- addUnderscores <$> many1 (noneOf "|]")
- _ <- many imageOption
- dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px")
- <|> return []
- _ <- many imageOption
- let kvs = case dims of
- w:[] -> [("width", w)]
- w:(h:[]) -> [("width", w), ("height", h)]
- _ -> []
- let attr = ("", [], kvs)
- caption <- (B.str fname <$ sym "]]")
- <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
-
-imageOption :: PandocMonad m => MWParser m String
-imageOption = try $ char '|' *> opt
- where
- opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
- , "thumb", "upright", "left", "right"
- , "center", "none", "baseline", "sub"
- , "super", "top", "text-top", "middle"
- , "bottom", "text-bottom" ])
- <|> try (string "frame")
- <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
-
-collapseUnderscores :: String -> String
-collapseUnderscores [] = []
-collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
-collapseUnderscores (x:xs) = x : collapseUnderscores xs
-
-addUnderscores :: String -> String
-addUnderscores = collapseUnderscores . intercalate "_" . words
-
-internalLink :: PandocMonad m => MWParser m Inlines
-internalLink = try $ do
- sym "[["
- pagename <- unwords . words <$> many (noneOf "|]")
- label <- option (B.text pagename) $ char '|' *>
- ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
- -- the "pipe trick"
- -- [[Help:Contents|] -> "Contents"
- <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
- sym "]]"
- linktrail <- B.text <$> many letter
- let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
- if "Category:" `isPrefixOf` pagename
- then do
- updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
- return mempty
- else return link
-
-externalLink :: PandocMonad m => MWParser m Inlines
-externalLink = try $ do
- char '['
- (_, src) <- uri
- lab <- try (trimInlines . mconcat <$>
- (skipMany1 spaceChar *> manyTill inline (char ']')))
- <|> do char ']'
- num <- mwNextLinkNumber <$> getState
- updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
- return $ B.str $ show num
- return $ B.link src "" lab
-
-url :: PandocMonad m => MWParser m Inlines
-url = do
- (orig, src) <- uri
- return $ B.link src "" (B.str orig)
-
--- | Parses a list of inlines between start and end delimiters.
-inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
-inlinesBetween start end =
- (trimInlines . mconcat) <$> try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace <* notFollowedBy' end
-
-emph :: PandocMonad m => MWParser m Inlines
-emph = B.emph <$> nested (inlinesBetween start end)
- where start = sym "''" >> lookAhead nonspaceChar
- end = try $ notFollowedBy' (() <$ strong) >> sym "''"
-
-strong :: PandocMonad m => MWParser m Inlines
-strong = B.strong <$> nested (inlinesBetween start end)
- where start = sym "'''" >> lookAhead nonspaceChar
- end = try $ sym "'''"
-
-doubleQuotes :: PandocMonad m => MWParser m Inlines
-doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
- where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar
- closeDoubleQuote = try $ sym "\""
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
deleted file mode 100644
index 1953c0c83..000000000
--- a/src/Text/Pandoc/Readers/Native.hs
+++ /dev/null
@@ -1,71 +0,0 @@
-{-
-Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; Either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Native
- Copyright : Copyright (C) 2011-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of a string representation of a pandoc type (@Pandoc@,
-@[Block]@, @Block@, @[Inline]@, or @Inline@) to a @Pandoc@ document.
--}
-module Text.Pandoc.Readers.Native ( readNative ) where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared (safeRead)
-import Text.Pandoc.Options (ReaderOptions)
-
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error
-import Text.Pandoc.Class
-
--- | Read native formatted text and return a Pandoc document.
--- The input may be a full pandoc document, a block list, a block,
--- an inline list, or an inline. Thus, for example,
---
--- > Str "hi"
---
--- will be treated as if it were
---
--- > Pandoc nullMeta [Plain [Str "hi"]]
---
-readNative :: PandocMonad m
- => ReaderOptions
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
- Right doc -> return doc
- Left _ -> throwError $ PandocParseError "couldn't read native"
-
-readBlocks :: String -> Either PandocError [Block]
-readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
-
-readBlock :: String -> Either PandocError Block
-readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
-
-readInlines :: String -> Either PandocError [Inline]
-readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
-
-readInline :: String -> Either PandocError Inline
-readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s)
-
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
deleted file mode 100644
index cec64895c..000000000
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ /dev/null
@@ -1,103 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-module Text.Pandoc.Readers.OPML ( readOPML ) where
-import Data.Char (toUpper)
-import Text.Pandoc.Options
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
-import Data.Generics
-import Control.Monad.State
-import Data.Default
-import Text.Pandoc.Class (PandocMonad)
-
-type OPML m = StateT OPMLState m
-
-data OPMLState = OPMLState{
- opmlSectionLevel :: Int
- , opmlDocTitle :: Inlines
- , opmlDocAuthors :: [Inlines]
- , opmlDocDate :: Inlines
- } deriving Show
-
-instance Default OPMLState where
- def = OPMLState{ opmlSectionLevel = 0
- , opmlDocTitle = mempty
- , opmlDocAuthors = []
- , opmlDocDate = mempty
- }
-
-readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc
-readOPML _ inp = do
- (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp)
- return $
- setTitle (opmlDocTitle st') $
- setAuthors (opmlDocAuthors st') $
- setDate (opmlDocDate st') $
- doc $ mconcat bs
-
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
- where go :: [Content] -> [Content]
- go (Text (CData CDataRaw _ _):xs) = xs
- go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
- go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
- go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
- go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
- go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = maybe (map toUpper e) id (lookupEntity e)
-
--- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
-attrValue attr elt =
- case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
- Just z -> z
- Nothing -> ""
-
--- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
--- exceptT = either throwError return
-
-asHtml :: PandocMonad m => String -> OPML m Inlines
-asHtml s =
- (\(Pandoc _ bs) -> case bs of
- [Plain ils] -> fromList ils
- _ -> mempty) <$> (lift $ readHtml def s)
-
-asMarkdown :: PandocMonad m => String -> OPML m Blocks
-asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s)
-
-getBlocks :: PandocMonad m => Element -> OPML m Blocks
-getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
-
-parseBlock :: PandocMonad m => Content -> OPML m Blocks
-parseBlock (Elem e) =
- case qName (elName e) of
- "ownerName" -> mempty <$ modify (\st ->
- st{opmlDocAuthors = [text $ strContent e]})
- "dateModified" -> mempty <$ modify (\st ->
- st{opmlDocDate = text $ strContent e})
- "title" -> mempty <$ modify (\st ->
- st{opmlDocTitle = text $ strContent e})
- "outline" -> gets opmlSectionLevel >>= sect . (+1)
- "?xml" -> return mempty
- _ -> getBlocks e
- where sect n = do headerText <- asHtml $ attrValue "text" e
- noteBlocks <- asMarkdown $ attrValue "_note" e
- modify $ \st -> st{ opmlSectionLevel = n }
- bs <- getBlocks e
- modify $ \st -> st{ opmlSectionLevel = n - 1 }
- let headerText' = case map toUpper (attrValue "type" e) of
- "LINK" -> link
- (attrValue "url" e) "" headerText
- _ -> headerText
- return $ header n headerText' <> noteBlocks <> bs
-parseBlock _ = return mempty
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
deleted file mode 100644
index ac22f2c09..000000000
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ /dev/null
@@ -1,113 +0,0 @@
-{-# LANGUAGE PatternGuards #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Reader.Odt
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Entry point to the odt reader.
--}
-
-module Text.Pandoc.Readers.Odt ( readOdt ) where
-
-import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
-
-import qualified Data.ByteString.Lazy as B
-
-import System.FilePath
-
-import Control.Monad.Except (throwError)
-
-import Text.Pandoc.Class (PandocMonad)
-import qualified Text.Pandoc.Class as P
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import Text.Pandoc.Options
-import Text.Pandoc.MediaBag
-import qualified Text.Pandoc.UTF8 as UTF8
-
-import Text.Pandoc.Readers.Odt.ContentReader
-import Text.Pandoc.Readers.Odt.StyleReader
-
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Shared (filteredFilesFromArchive)
-
-readOdt :: PandocMonad m
- => ReaderOptions
- -> B.ByteString
- -> m Pandoc
-readOdt opts bytes = case readOdt' opts bytes of
- Right (doc, mb) -> do
- P.setMediaBag mb
- return doc
- Left e -> throwError e
-
---
-readOdt' :: ReaderOptions
- -> B.ByteString
- -> Either PandocError (Pandoc, MediaBag)
-readOdt' _ bytes = bytesToOdt bytes-- of
--- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
--- Left err -> Left err
-
---
-bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
-bytesToOdt bytes = case toArchiveOrFail bytes of
- Right archive -> archiveToOdt archive
- Left _ -> Left $ PandocParseError "Couldn't parse odt file."
-
---
-archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
- | Just contentEntry <- findEntryByPath "content.xml" archive
- , Just stylesEntry <- findEntryByPath "styles.xml" archive
- , Just contentElem <- entryToXmlElem contentEntry
- , Just stylesElem <- entryToXmlElem stylesEntry
- , Right styles <- chooseMax (readStylesAt stylesElem )
- (readStylesAt contentElem)
- , media <- filteredFilesFromArchive archive filePathIsOdtMedia
- , startState <- readerState styles media
- , Right pandocWithMedia <- runConverter' read_body
- startState
- contentElem
-
- = Right pandocWithMedia
-
- | otherwise
- -- Not very detailed, but I don't think more information would be helpful
- = Left $ PandocParseError "Couldn't parse odt file."
- where
- filePathIsOdtMedia :: FilePath -> Bool
- filePathIsOdtMedia fp =
- let (dir, _) = splitFileName fp
- in
- (dir == "Pictures/")
-
-
---
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
deleted file mode 100644
index b056f1ecc..000000000
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ /dev/null
@@ -1,253 +0,0 @@
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Arrows.State
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-An arrow that transports a state. It is in essence a more powerful version of
-the standard state monad. As it is such a simple extension, there are
-other version out there that do exactly the same.
-The implementation is duplicated, though, to add some useful features.
-Most of these might be implemented without access to innards, but it's much
-faster and easier to implement this way.
--}
-
-module Text.Pandoc.Readers.Odt.Arrows.State where
-
-import Prelude hiding ( foldr, foldl )
-
-import qualified Control.Category as Cat
-import Control.Arrow
-import Control.Monad
-
-import Data.Foldable
-import Data.Monoid
-
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-
-
-newtype ArrowState state a b = ArrowState
- { runArrowState :: (state, a) -> (state, b) }
-
--- | Constructor
-withState :: (state -> a -> (state, b)) -> ArrowState state a b
-withState = ArrowState . uncurry
-
--- | Constructor
-withState' :: ((state, a) -> (state, b)) -> ArrowState state a b
-withState' = ArrowState
-
--- | Constructor
-modifyState :: (state -> state ) -> ArrowState state a a
-modifyState = ArrowState . first
-
--- | Constructor
-ignoringState :: ( a -> b ) -> ArrowState state a b
-ignoringState = ArrowState . second
-
--- | Constructor
-fromState :: (state -> (state, b)) -> ArrowState state a b
-fromState = ArrowState . (.fst)
-
--- | Constructor
-extractFromState :: (state -> b ) -> ArrowState state x b
-extractFromState f = ArrowState $ \(state,_) -> (state, f state)
-
--- | Constructor
-withUnchangedState :: (state -> a -> b ) -> ArrowState state a b
-withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a)
-
--- | Constructor
-tryModifyState :: (state -> Either f state)
- -> ArrowState state a (Either f a)
-tryModifyState f = ArrowState $ \(state,a)
- -> (state,).Left ||| (,Right a) $ f state
-
-instance Cat.Category (ArrowState s) where
- id = ArrowState id
- arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1)
-
-instance Arrow (ArrowState state) where
- arr = ignoringState
- first a = ArrowState $ \(s,(aF,aS))
- -> second (,aS) $ runArrowState a (s,aF)
- second a = ArrowState $ \(s,(aF,aS))
- -> second (aF,) $ runArrowState a (s,aS)
-
-instance ArrowChoice (ArrowState state) where
- left a = ArrowState $ \(s,e) -> case e of
- Left l -> second Left $ runArrowState a (s,l)
- Right r -> (s, Right r)
- right a = ArrowState $ \(s,e) -> case e of
- Left l -> (s, Left l)
- Right r -> second Right $ runArrowState a (s,r)
-
-instance ArrowLoop (ArrowState state) where
- loop a = ArrowState $ \(s, x)
- -> let (s', (x', _d)) = runArrowState a (s, (x, _d))
- in (s', x')
-
-instance ArrowApply (ArrowState state) where
- app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b)
-
-
--- | Embedding of a state arrow in a state arrow with a different state type.
-switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y
-switchState there back a = ArrowState $ first there
- >>> runArrowState a
- >>> first back
-
--- | Lift a state arrow to modify the state of an arrow
--- with a different state type.
-liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x
-liftToState unlift a = modifyState $ unlift &&& id
- >>> runArrowState a
- >>> snd
-
--- | Switches the type of the state temporarily.
--- Drops the intermediate result state, behaving like the identity arrow,
--- save for side effects in the state.
-withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x
-withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst
-
--- | Switches the type of the state temporarily.
--- Returns the resulting sub-state.
-withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s'
-withSubState' unlift a = ArrowState $ runArrowState unlift
- >>> switch
- >>> runArrowState a
- >>> switch
- where switch (x,y) = (y,x)
-
--- | Switches the type of the state temporarily.
--- Drops the intermediate result state, behaving like a fallible
--- identity arrow, save for side effects in the state.
-withSubStateF :: ArrowState s x (Either f s')
- -> ArrowState s' s (Either f s )
- -> ArrowState s x (Either f x )
-withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a)
- >>^ spreadChoice
- >>^ fmap fst
-
--- | Switches the type of the state temporarily.
--- Returns the resulting sub-state.
-withSubStateF' :: ArrowState s x (Either f s')
- -> ArrowState s' s (Either f s )
- -> ArrowState s x (Either f s')
-withSubStateF' unlift a = ArrowState go
- where go p@(s,_) = tryRunning unlift
- ( tryRunning a (second Right) )
- p
- where tryRunning a' b v = case runArrowState a' v of
- (_ , Left f) -> (s, Left f)
- (x , Right y) -> b (y,x)
-
--- | Fold a state arrow through something 'Foldable'. Collect the results
--- in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
-foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
-foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f
- where a' x (s',m) = second (m <>) $ runArrowState a (s',x)
-
--- | Fold a state arrow through something 'Foldable'. Collect the results
--- in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
-foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
-foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f
- where a' (s',m) x = second (m <>) $ runArrowState a (s',x)
-
--- | Fold a fallible state arrow through something 'Foldable'. Collect the
--- results in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
--- If the iteration fails, the state will be reset to the initial one.
-foldS' :: (Foldable f, Monoid m)
- => ArrowState s x (Either e m)
- -> ArrowState s (f x) (Either e m)
-foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f
- where a' s x (s',Right m) = case runArrowState a (s',x) of
- (s'',Right m') -> (s'', Right (m <> m'))
- (_ ,Left e ) -> (s , Left e)
- a' _ _ e = e
-
--- | Fold a fallible state arrow through something 'Foldable'. Collect the
--- results in a 'Monoid'.
--- Intermediate form of a fold between one with "only" a 'Monoid'
--- and one with any function.
--- If the iteration fails, the state will be reset to the initial one.
-foldSL' :: (Foldable f, Monoid m)
- => ArrowState s x (Either e m)
- -> ArrowState s (f x) (Either e m)
-foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f
- where a' s (s',Right m) x = case runArrowState a (s',x) of
- (s'',Right m') -> (s'', Right (m <> m'))
- (_ ,Left e ) -> (s , Left e)
- a' _ e _ = e
-
--- | Fold a state arrow through something 'Foldable'. Collect the results in a
--- 'MonadPlus'.
-iterateS :: (Foldable f, MonadPlus m)
- => ArrowState s x y
- -> ArrowState s (f x) (m y)
-iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
- where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x)
-
--- | Fold a state arrow through something 'Foldable'. Collect the results in a
--- 'MonadPlus'.
-iterateSL :: (Foldable f, MonadPlus m)
- => ArrowState s x y
- -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
- where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x)
-
-
--- | Fold a fallible state arrow through something 'Foldable'.
--- Collect the results in a 'MonadPlus'.
--- If the iteration fails, the state will be reset to the initial one.
-iterateS' :: (Foldable f, MonadPlus m)
- => ArrowState s x (Either e y )
- -> ArrowState s (f x) (Either e (m y))
-iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f
- where a' s x (s',Right m) = case runArrowState a (s',x) of
- (s'',Right m') -> (s'',Right $ mplus m $ return m')
- (_ ,Left e ) -> (s ,Left e )
- a' _ _ e = e
-
--- | Fold a fallible state arrow through something 'Foldable'.
--- Collect the results in a 'MonadPlus'.
--- If the iteration fails, the state will be reset to the initial one.
-iterateSL' :: (Foldable f, MonadPlus m)
- => ArrowState s x (Either e y )
- -> ArrowState s (f x) (Either e (m y))
-iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f
- where a' s (s',Right m) x = case runArrowState a (s',x) of
- (s'',Right m') -> (s'',Right $ mplus m $ return m')
- (_ ,Left e ) -> (s ,Left e )
- a' _ e _ = e
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
deleted file mode 100644
index 218a85661..000000000
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ /dev/null
@@ -1,495 +0,0 @@
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Arrows.Utils
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Utility functions for Arrows (Kleisli monads).
-
-Some general notes on notation:
-
-* "^" is meant to stand for a pure function that is lifted into an arrow
-based on its usage for that purpose in "Control.Arrow".
-* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function
-with an equivalent return value.
-* "_" stands for the dropping of a value.
--}
-
--- We export everything
-module Text.Pandoc.Readers.Odt.Arrows.Utils where
-
-import Control.Arrow
-import Control.Monad ( join, MonadPlus(..) )
-
-import qualified Data.Foldable as F
-import Data.Monoid
-
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.Utils
-
-and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c')
-and2 = (&&&)
-
-and3 :: (Arrow a)
- => a b c0->a b c1->a b c2
- -> a b (c0,c1,c2 )
-and4 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3
- -> a b (c0,c1,c2,c3 )
-and5 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4
- -> a b (c0,c1,c2,c3,c4 )
-and6 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5
- -> a b (c0,c1,c2,c3,c4,c5 )
-and7 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6
- -> a b (c0,c1,c2,c3,c4,c5,c6 )
-and8 :: (Arrow a)
- => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7
- -> a b (c0,c1,c2,c3,c4,c5,c6,c7)
-
-and3 a b c = (and2 a b ) &&& c
- >>^ \((z,y ) , x) -> (z,y,x )
-and4 a b c d = (and3 a b c ) &&& d
- >>^ \((z,y,x ) , w) -> (z,y,x,w )
-and5 a b c d e = (and4 a b c d ) &&& e
- >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v )
-and6 a b c d e f = (and5 a b c d e ) &&& f
- >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u )
-and7 a b c d e f g = (and6 a b c d e f ) &&& g
- >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t )
-and8 a b c d e f g h = (and7 a b c d e f g) &&& h
- >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s)
-
-liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z
-liftA2 f a b = a &&& b >>^ uncurry f
-
-liftA3 :: (Arrow a) => (z->y->x -> r)
- -> a b z->a b y->a b x
- -> a b r
-liftA4 :: (Arrow a) => (z->y->x->w -> r)
- -> a b z->a b y->a b x->a b w
- -> a b r
-liftA5 :: (Arrow a) => (z->y->x->w->v -> r)
- -> a b z->a b y->a b x->a b w->a b v
- -> a b r
-liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r)
- -> a b z->a b y->a b x->a b w->a b v->a b u
- -> a b r
-liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r)
- -> a b z->a b y->a b x->a b w->a b v->a b u->a b t
- -> a b r
-liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r)
- -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s
- -> a b r
-
-liftA3 fun a b c = and3 a b c >>^ uncurry3 fun
-liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun
-liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun
-liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun
-liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun
-liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun
-
-liftA :: (Arrow a) => (y -> z) -> a b y -> a b z
-liftA fun a = a >>^ fun
-
-
--- | Duplicate a value to subsequently feed it into different arrows.
--- Can almost always be replaced with '(&&&)', 'keepingTheValue',
--- or even '(|||)'.
--- Aequivalent to
--- > returnA &&& returnA
-duplicate :: (Arrow a) => a b (b,b)
-duplicate = arr $ join (,)
-
--- | Lifts the combination of two values into an arrow.
-joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z
-joinOn = arr.uncurry
-
--- | Applies a function to the uncurried result-pair of an arrow-application.
--- (The %-symbol was chosen to evoke an association with pairs.)
-(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
-a >>% f = a >>^ uncurry f
-
--- | '(>>%)' with its arguments flipped
-(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
-(%<<) = flip (>>%)
-
--- | Precomposition with an uncurried function
-(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
-f %>> a = uncurry f ^>> a
-
--- | Precomposition with an uncurried function (right to left variant)
-(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
-(<<%) = flip (%>>)
-
-infixr 2 >>%, %<<, %>>, <<%
-
-
--- | Duplicate a value and apply an arrow to the second instance.
--- Aequivalent to
--- > \a -> duplicate >>> second a
--- or
--- > \a -> returnA &&& a
-keepingTheValue :: (Arrow a) => a b c -> a b (b,c)
-keepingTheValue a = returnA &&& a
-
--- | Duplicate a value and apply an arrow to the first instance.
--- Aequivalent to
--- > \a -> duplicate >>> first a
--- or
--- > \a -> a &&& returnA
-keepingTheValue' :: (Arrow a) => a b c -> a b (c,b)
-keepingTheValue' a = a &&& returnA
-
--- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'.
--- Actually, it's the more complex '(>=>)', because 'bind' alone does not
--- combine as nicely in arrow form.
--- The current implementation is not the most efficient one, because it can
--- not return directly if a 'Nothing' is encountered. That in turn follows
--- from the type system, as 'Nothing' has an "invisible" type parameter that
--- can not be dropped early.
---
--- Also, there probably is a way to generalize this to other monads
--- or applicatives, but I'm leaving that as an exercise to the reader.
--- I have a feeling there is a new Arrow-typeclass to be found that is less
--- restrictive than 'ArrowApply'. If it is already out there,
--- I have not seen it yet. ('ArrowPlus' for example is not general enough.)
-(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c)
-a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join
-
-infixr 2 >>>=
-
--- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required.
--- (But still different from a true bind)
-(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b)
-(>++<) = liftA2 mplus
-
--- | Left-compose with a pure function
-leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r)
-leftLift = left.arr
-
--- | Right-compose with a pure function
-rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r')
-rightLift = right.arr
-
-
-( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c')
-( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c')
-( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c')
-
-l ^+++ r = leftLift l >>> right r
-l +++^ r = left l >>> rightLift r
-l ^+++^ r = leftLift l >>> rightLift r
-
-infixr 2 ^+++, +++^, ^+++^
-
-( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d
-( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d
-( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d
-
-l ^||| r = arr l ||| r
-l |||^ r = l ||| arr r
-l ^|||^ r = arr l ||| arr r
-
-infixr 2 ^||| , |||^, ^|||^
-
-( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c')
-( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c')
-( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c')
-
-l ^&&& r = arr l &&& r
-l &&&^ r = l &&& arr r
-l ^&&&^ r = arr l &&& arr r
-
-infixr 3 ^&&&, &&&^, ^&&&^
-
-( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c')
-( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c')
-( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c')
-
-l ^*** r = arr l *** r
-l ***^ r = l *** arr r
-l ^***^ r = arr l *** arr r
-
-infixr 3 ^***, ***^, ^***^
-
--- | A version of
---
--- >>> \p -> arr (\x -> if p x the Right x else Left x)
---
--- but with p being an arrow
-choose :: (ArrowChoice a) => a b Bool -> a b (Either b b)
-choose checkValue = keepingTheValue checkValue >>^ select
- where select (x,True ) = Right x
- select (x,False ) = Left x
-
--- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@.
-choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r)
-choiceToMaybe = arr eitherToMaybe
-
--- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@.
-maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b)
-maybeToChoice = arr maybeToEither
-
--- | Lifts a constant value into an arrow
-returnV :: (Arrow a) => c -> a x c
-returnV = arr.const
-
--- | 'returnA' dropping everything
-returnA_ :: (Arrow a) => a _b ()
-returnA_ = returnV ()
-
--- | Wrapper for an arrow that can be evaluated im parallel. All
--- Arrows can be evaluated in parallel, as long as they return a
--- monoid.
-newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c }
- deriving (Eq, Ord, Show)
-
-instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where
- mempty = CoEval $ returnV mempty
- (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend
-
--- | Evaluates a collection of arrows in a parallel fashion.
---
--- This is in essence a fold of '(&&&)' over the collection,
--- so the actual execution order and parallelity depends on the
--- implementation of '(&&&)' in the arrow in question.
--- The default implementation of '(&&&)' for example keeps the
--- order as given in the collection.
---
--- This function can be seen as a generalization of
--- 'Control.Applicative.sequenceA' to arrows or as an alternative to
--- a fold with 'Control.Applicative.WrappedArrow', which
--- substitutes the monoid with function application.
---
-coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m
-coEval = evalParallelArrow . (F.foldMap CoEval)
-
--- | Defines Left as failure, Right as success
-type FallibleArrow a input failure success = a input (Either failure success)
-
-type ReFallibleArrow a failure success success'
- = FallibleArrow a (Either failure success) failure success'
-
--- | Wrapper for fallible arrows. Fallible arrows are all arrows that return
--- an Either value where left is a faliure and right is a success value.
-newtype AlternativeArrow a input failure success
- = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success }
-
-
-instance (ArrowChoice a, Monoid failure)
- => Monoid (AlternativeArrow a input failure success) where
- mempty = TryArrow $ returnV $ Left mempty
- (TryArrow a) `mappend` (TryArrow b)
- = TryArrow $ a &&& b
- >>^ \(a',~b')
- -> ( (\a'' -> left (mappend a'') b') ||| Right )
- a'
-
--- | Evaluates a collection of fallible arrows, trying each one in succession.
--- Left values are interpreted as failures, right values as successes.
---
--- The evaluation is stopped once an arrow succeeds.
--- Up to that point, all failures are collected in the failure-monoid.
--- Note that '()' is a monoid, and thus can serve as a failure-collector if
--- you are uninterested in the exact failures.
---
--- This is in essence a fold of '(&&&)' over the collection, enhanced with a
--- little bit of repackaging, so the actual execution order depends on the
--- implementation of '(&&&)' in the arrow in question.
--- The default implementation of '(&&&)' for example keeps the
--- order as given in the collection.
---
-tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure)
- => f (FallibleArrow a b failure success)
- -> FallibleArrow a b failure success
-tryArrows = evalAlternativeArrow . (F.foldMap TryArrow)
-
---
-liftSuccess :: (ArrowChoice a)
- => (success -> success')
- -> ReFallibleArrow a failure success success'
-liftSuccess = rightLift
-
---
-liftAsSuccess :: (ArrowChoice a)
- => a x success
- -> FallibleArrow a x failure success
-liftAsSuccess a = a >>^ Right
-
---
-asFallibleArrow :: (ArrowChoice a)
- => a x success
- -> FallibleArrow a x failure success
-asFallibleArrow a = a >>^ Right
-
--- | Raises an error into a 'ReFallibleArrow' if the arrow is already in
--- "error mode"
-liftError :: (ArrowChoice a, Monoid failure)
- => failure
- -> ReFallibleArrow a failure success success
-liftError e = leftLift (e <>)
-
--- | Raises an error into a 'FallibleArrow', droping both the arrow input
--- and any previously stored error value.
-_raiseA :: (ArrowChoice a)
- => failure
- -> FallibleArrow a x failure success
-_raiseA e = returnV (Left e)
-
--- | Raises an empty error into a 'FallibleArrow', droping both the arrow input
--- and any previously stored error value.
-_raiseAEmpty :: (ArrowChoice a, Monoid failure)
- => FallibleArrow a x failure success
-_raiseAEmpty = _raiseA mempty
-
--- | Raises an error into a 'ReFallibleArrow', possibly appending the new error
--- to an existing one
-raiseA :: (ArrowChoice a, Monoid failure)
- => failure
- -> ReFallibleArrow a failure success success
-raiseA e = arr $ Left.(either (<> e) (const e))
-
--- | Raises an empty error into a 'ReFallibleArrow'. If there already is an
--- error, nothing changes.
--- (Note that this function is only aequivalent to @raiseA mempty@ iff the
--- failure monoid follows the monoid laws.)
-raiseAEmpty :: (ArrowChoice a, Monoid failure)
- => ReFallibleArrow a failure success success
-raiseAEmpty = arr (fromRight (const mempty) >>> Left)
-
-
--- | Execute the second arrow if the first succeeds
-(>>?) :: (ArrowChoice a)
- => FallibleArrow a x failure success
- -> FallibleArrow a success failure success'
- -> FallibleArrow a x failure success'
-a >>? b = a >>> Left ^||| b
-
--- | Execute the lifted second arrow if the first succeeds
-(>>?^) :: (ArrowChoice a)
- => FallibleArrow a x failure success
- -> (success -> success')
- -> FallibleArrow a x failure success'
-a >>?^ f = a >>^ Left ^|||^ Right . f
-
--- | Execute the lifted second arrow if the first succeeds
-(>>?^?) :: (ArrowChoice a)
- => FallibleArrow a x failure success
- -> (success -> Either failure success')
- -> FallibleArrow a x failure success'
-a >>?^? b = a >>> Left ^|||^ b
-
--- | Execute the second arrow if the lifted first arrow succeeds
-(^>>?) :: (ArrowChoice a)
- => (x -> Either failure success)
- -> FallibleArrow a success failure success'
- -> FallibleArrow a x failure success'
-a ^>>? b = a ^>> Left ^||| b
-
--- | Execute the lifted second arrow if the lifted first arrow succeeds
-(^>>?^) :: (ArrowChoice a)
- => (x -> Either failure success)
- -> (success -> success')
- -> FallibleArrow a x failure success'
-a ^>>?^ f = arr $ a >>> right f
-
--- | Execute the lifted second arrow if the lifted first arrow succeeds
-(^>>?^?) :: (ArrowChoice a)
- => (x -> Either failure success)
- -> (success -> Either failure success')
- -> FallibleArrow a x failure success'
-a ^>>?^? f = a ^>> Left ^|||^ f
-
--- | Execute the second, non-fallible arrow if the first arrow succeeds
-(>>?!) :: (ArrowChoice a)
- => FallibleArrow a x failure success
- -> a success success'
- -> FallibleArrow a x failure success'
-a >>?! f = a >>> right f
-
----
-(>>?%) :: (ArrowChoice a)
- => FallibleArrow a x f (b,b')
- -> (b -> b' -> c)
- -> FallibleArrow a x f c
-a >>?% f = a >>?^ (uncurry f)
-
----
-(^>>?%) :: (ArrowChoice a)
- => (x -> Either f (b,b'))
- -> (b -> b' -> c)
- -> FallibleArrow a x f c
-a ^>>?% f = arr a >>?^ (uncurry f)
-
----
-(>>?%?) :: (ArrowChoice a)
- => FallibleArrow a x f (b,b')
- -> (b -> b' -> (Either f c))
- -> FallibleArrow a x f c
-a >>?%? f = a >>?^? (uncurry f)
-
-infixr 1 >>?, >>?^, >>?^?
-infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?!
-infixr 1 >>?%, ^>>?%, >>?%?
-
--- | Keep values that are Right, replace Left values by a constant.
-ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v
-ifFailedUse v = arr $ either (const v) id
-
--- | '(&&)' lifted into an arrow
-(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
-(<&&>) = liftA2 (&&)
-
--- | '(||)' lifted into an arrow
-(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
-(<||>) = liftA2 (||)
-
--- | An equivalent of '(&&)' in a fallible arrow
-(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s
- -> FallibleArrow a x f s'
- -> FallibleArrow a x f (s,s')
-(>&&<) = liftA2 chooseMin
-
--- | An equivalent of '(||)' in some forms of fallible arrows
-(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s
- -> FallibleArrow a x f s
- -> FallibleArrow a x f s
-(>||<) = liftA2 chooseMax
-
--- | An arrow version of a short-circuit (<|>)
-ifFailedDo :: (ArrowChoice a)
- => FallibleArrow a x f y
- -> FallibleArrow a x f y
- -> FallibleArrow a x f y
-ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right)
- where repackage (x , Left _) = Left x
- repackage (_ , Right y) = Right y
-
-infixr 4 <&&>, <||>, >&&<, >||<
-infixr 1 `ifFailedDo`
-
-
diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs
deleted file mode 100644
index 1f095bade..000000000
--- a/src/Text/Pandoc/Readers/Odt/Base.hs
+++ /dev/null
@@ -1,43 +0,0 @@
-{-# LANGUAGE PatternGuards #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Base
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Core types of the odt reader.
--}
-
-module Text.Pandoc.Readers.Odt.Base where
-
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Namespaces
-
-type OdtConverterState s = XMLConverterState Namespace s
-
-type XMLReader s a b = FallibleXMLConverter Namespace s a b
-
-type XMLReaderSafe s a b = XMLConverter Namespace s a b
-
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
deleted file mode 100644
index a1bd8cb59..000000000
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ /dev/null
@@ -1,929 +0,0 @@
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE RecordWildCards #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.ContentReader
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-The core of the odt reader that converts odt features into Pandoc types.
--}
-
-module Text.Pandoc.Readers.Odt.ContentReader
-( readerState
-, read_body
-) where
-
-import Control.Arrow
-import Control.Applicative hiding ( liftA, liftA2, liftA3 )
-
-import qualified Data.ByteString.Lazy as B
-import qualified Data.Map as M
-import Data.List ( find, intercalate )
-import Data.Maybe
-
-import qualified Text.XML.Light as XML
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Text.Pandoc.Shared
-
-import Text.Pandoc.Readers.Odt.Base
-import Text.Pandoc.Readers.Odt.Namespaces
-import Text.Pandoc.Readers.Odt.StyleReader
-
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.Utils
-
-import qualified Data.Set as Set
-
---------------------------------------------------------------------------------
--- State
---------------------------------------------------------------------------------
-
-type Anchor = String
-type Media = [(FilePath, B.ByteString)]
-
-data ReaderState
- = ReaderState { -- | A collection of styles read somewhere else.
- -- It is only queried here, not modified.
- styleSet :: Styles
- -- | A stack of the styles of parent elements.
- -- Used to look up inherited style properties.
- , styleTrace :: [Style]
- -- | Keeps track of the current depth in nested lists
- , currentListLevel :: ListLevel
- -- | Lists may provide their own style, but they don't have
- -- to. If they do not, the style of a parent list may be used
- -- or even a default list style from the paragraph style.
- -- This value keeps track of the closest list style there
- -- currently is.
- , currentListStyle :: Maybe ListStyle
- -- | A map from internal anchor names to "pretty" ones.
- -- The mapping is a purely cosmetic one.
- , bookmarkAnchors :: M.Map Anchor Anchor
- -- | A map of files / binary data from the archive
- , envMedia :: Media
- -- | Hold binary resources used in the document
- , odtMediaBag :: MediaBag
--- , sequences
--- , trackedChangeIDs
- }
- deriving ( Show )
-
-readerState :: Styles -> Media -> ReaderState
-readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty
-
---
-pushStyle' :: Style -> ReaderState -> ReaderState
-pushStyle' style state = state { styleTrace = style : styleTrace state }
-
---
-popStyle' :: ReaderState -> ReaderState
-popStyle' state = case styleTrace state of
- _:trace -> state { styleTrace = trace }
- _ -> state
-
---
-modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState)
-modifyListLevel f state = state { currentListLevel = f (currentListLevel state) }
-
---
-shiftListLevel :: ListLevel -> (ReaderState -> ReaderState)
-shiftListLevel diff = modifyListLevel (+ diff)
-
---
-swapCurrentListStyle :: Maybe ListStyle -> ReaderState
- -> (ReaderState, Maybe ListStyle)
-swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle }
- , currentListStyle state
- )
-
---
-lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor
-lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors
-
---
-putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState
-putPrettyAnchor ugly pretty state@ReaderState{..}
- = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors }
-
---
-usedAnchors :: ReaderState -> [Anchor]
-usedAnchors ReaderState{..} = M.elems bookmarkAnchors
-
-getMediaBag :: ReaderState -> MediaBag
-getMediaBag ReaderState{..} = odtMediaBag
-
-getMediaEnv :: ReaderState -> Media
-getMediaEnv ReaderState{..} = envMedia
-
-insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState
-insertMedia' (fp, bs) state@ReaderState{..}
- = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag }
-
---------------------------------------------------------------------------------
--- Reader type and associated tools
---------------------------------------------------------------------------------
-
-type OdtReader a b = XMLReader ReaderState a b
-
-type OdtReaderSafe a b = XMLReaderSafe ReaderState a b
-
--- | Extract something from the styles
-fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b
-fromStyles f = keepingTheValue
- (getExtraState >>^ styleSet)
- >>% f
-
---
-getStyleByName :: OdtReader StyleName Style
-getStyleByName = fromStyles lookupStyle >>^ maybeToChoice
-
---
-findStyleFamily :: OdtReader Style StyleFamily
-findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice
-
---
-lookupListStyle :: OdtReader StyleName ListStyle
-lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice
-
---
-switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle)
-switchCurrentListStyle = keepingTheValue getExtraState
- >>% swapCurrentListStyle
- >>> first setExtraState
- >>^ snd
-
---
-pushStyle :: OdtReaderSafe Style Style
-pushStyle = keepingTheValue (
- ( keepingTheValue getExtraState
- >>% pushStyle'
- )
- >>> setExtraState
- )
- >>^ fst
-
---
-popStyle :: OdtReaderSafe x x
-popStyle = keepingTheValue (
- getExtraState
- >>> arr popStyle'
- >>> setExtraState
- )
- >>^ fst
-
---
-getCurrentListLevel :: OdtReaderSafe _x ListLevel
-getCurrentListLevel = getExtraState >>^ currentListLevel
-
---
-updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString)
-updateMediaWithResource = keepingTheValue (
- (keepingTheValue getExtraState
- >>% insertMedia'
- )
- >>> setExtraState
- )
- >>^ fst
-
-lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
-lookupResource = proc target -> do
- state <- getExtraState -< ()
- case lookup target (getMediaEnv state) of
- Just bs -> returnV (target, bs) -<< ()
- Nothing -> returnV ("", B.empty) -< ()
-
-type AnchorPrefix = String
-
--- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
--- unique identifier but without assuming that the id should be for a header.
--- Second argument is a list of already used identifiers.
-uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
-uniqueIdentFrom baseIdent usedIdents =
- let numIdent n = baseIdent ++ "-" ++ show n
- in if baseIdent `elem` usedIdents
- then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
- Just x -> numIdent x
- Nothing -> baseIdent -- if we have more than 60,000, allow repeats
- else baseIdent
-
--- | First argument: basis for a new "pretty" anchor if none exists yet
--- Second argument: a key ("ugly" anchor)
--- Returns: saved "pretty" anchor or created new one
-getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor
-getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
- state <- getExtraState -< ()
- case lookupPrettyAnchor uglyAnchor state of
- Just prettyAnchor -> returnA -< prettyAnchor
- Nothing -> do
- let newPretty = uniqueIdentFrom baseIdent (usedAnchors state)
- modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty
-
--- | Input: basis for a new header anchor
--- Ouput: saved new anchor
-getHeaderAnchor :: OdtReaderSafe Inlines Anchor
-getHeaderAnchor = proc title -> do
- state <- getExtraState -< ()
- let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
- modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
-
-
---------------------------------------------------------------------------------
--- Working with styles
---------------------------------------------------------------------------------
-
---
-readStyleByName :: OdtReader _x (StyleName, Style)
-readStyleByName =
- findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE
- where
- liftE :: (StyleName, Fallible Style) -> Fallible (StyleName, Style)
- liftE (name, Right v) = Right (name, v)
- liftE (_, Left v) = Left v
-
---
-isStyleToTrace :: OdtReader Style Bool
-isStyleToTrace = findStyleFamily >>?^ (==FaText)
-
---
-withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines
-withNewStyle a = proc x -> do
- fStyle <- readStyleByName -< ()
- case fStyle of
- Right (styleName, _) | isCodeStyle styleName -> do
- inlines <- a -< x
- arr inlineCode -<< inlines
- Right (_, style) -> do
- mFamily <- arr styleFamily -< style
- fTextProps <- arr ( maybeToChoice
- . textProperties
- . styleProperties
- ) -< style
- case fTextProps of
- Right textProps -> do
- state <- getExtraState -< ()
- let triple = (state, textProps, mFamily)
- modifier <- arr modifierFromStyleDiff -< triple
- fShouldTrace <- isStyleToTrace -< style
- case fShouldTrace of
- Right shouldTrace -> do
- if shouldTrace
- then do
- pushStyle -< style
- inlines <- a -< x
- popStyle -< ()
- arr modifier -<< inlines
- else
- -- In case anything goes wrong
- a -< x
- Left _ -> a -< x
- Left _ -> a -< x
- Left _ -> a -< x
- where
- isCodeStyle :: StyleName -> Bool
- isCodeStyle "Source_Text" = True
- isCodeStyle _ = False
-
- inlineCode :: Inlines -> Inlines
- inlineCode = code . intercalate "" . map stringify . toList
-
-type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
-type InlineModifier = Inlines -> Inlines
-
--- | Given data about the local style changes, calculates how to modify
--- an instance of 'Inlines'
-modifierFromStyleDiff :: PropertyTriple -> InlineModifier
-modifierFromStyleDiff propertyTriple =
- composition $
- (getVPosModifier propertyTriple)
- : map (first ($ propertyTriple) >>> ifThen_else ignore)
- [ (hasEmphChanged , emph )
- , (hasChanged isStrong , strong )
- , (hasChanged strikethrough , strikeout )
- ]
- where
- ifThen_else else' (if',then') = if if' then then' else else'
-
- ignore = id :: InlineModifier
-
- getVPosModifier :: PropertyTriple -> InlineModifier
- getVPosModifier triple@(_,textProps,_) =
- let getVPos = Just . verticalPosition
- in case lookupPreviousValueM getVPos triple of
- Nothing -> ignore
- Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps)
-
- getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore
- getVPosModifier' ( _ , VPosSub ) = subscript
- getVPosModifier' ( _ , VPosSuper ) = superscript
- getVPosModifier' ( _ , _ ) = ignore
-
- hasEmphChanged :: PropertyTriple -> Bool
- hasEmphChanged = swing any [ hasChanged isEmphasised
- , hasChangedM pitch
- , hasChanged underline
- ]
-
- hasChanged property triple@(_, property -> newProperty, _) =
- maybe True (/=newProperty) (lookupPreviousValue property triple)
-
- hasChangedM property triple@(_, textProps,_) =
- fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple
-
- lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties)
-
- lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties)
-
- lookupPreviousStyleValue f (ReaderState{..},_,mFamily)
- = ( findBy f $ extendedStylePropertyChain styleTrace styleSet )
- <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily )
-
-
-type ParaModifier = Blocks -> Blocks
-
-_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int
-_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int
-_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5
-_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5
-
--- | Returns either 'id' or 'blockQuote' depending on the current indentation
-getParaModifier :: Style -> ParaModifier
-getParaModifier Style{..} | Just props <- paraProperties styleProperties
- , isBlockQuote (indentation props)
- (margin_left props)
- = blockQuote
- | otherwise
- = id
- where
- isBlockQuote mIndent mMargin
- | LengthValueMM indent <- mIndent
- , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
- = True
- | LengthValueMM margin <- mMargin
- , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
- = True
- | LengthValueMM indent <- mIndent
- , LengthValueMM margin <- mMargin
- = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
-
- | PercentValue indent <- mIndent
- , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
- = True
- | PercentValue margin <- mMargin
- , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
- = True
- | PercentValue indent <- mIndent
- , PercentValue margin <- mMargin
- = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
-
- | otherwise
- = False
-
---
-constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks
-constructPara reader = proc blocks -> do
- fStyle <- readStyleByName -< blocks
- case fStyle of
- Left _ -> reader -< blocks
- Right (styleName, _) | isTableCaptionStyle styleName -> do
- blocks' <- reader -< blocks
- arr tableCaptionP -< blocks'
- Right (_, style) -> do
- let modifier = getParaModifier style
- blocks' <- reader -< blocks
- arr modifier -<< blocks'
- where
- isTableCaptionStyle :: StyleName -> Bool
- isTableCaptionStyle "Table" = True
- isTableCaptionStyle _ = False
- tableCaptionP b = divWith ("", ["caption"], []) b
-
-type ListConstructor = [Blocks] -> Blocks
-
-getListConstructor :: ListLevelStyle -> ListConstructor
-getListConstructor ListLevelStyle{..} =
- case listLevelType of
- LltBullet -> bulletList
- LltImage -> bulletList
- LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat
- listNumberDelim = toListNumberDelim listItemPrefix
- listItemSuffix
- in orderedListWith (listItemStart, listNumberStyle, listNumberDelim)
- where
- toListNumberStyle LinfNone = DefaultStyle
- toListNumberStyle LinfNumber = Decimal
- toListNumberStyle LinfRomanLC = LowerRoman
- toListNumberStyle LinfRomanUC = UpperRoman
- toListNumberStyle LinfAlphaLC = LowerAlpha
- toListNumberStyle LinfAlphaUC = UpperAlpha
- toListNumberStyle (LinfString _) = Example
-
- toListNumberDelim Nothing (Just ".") = Period
- toListNumberDelim (Just "" ) (Just ".") = Period
- toListNumberDelim Nothing (Just ")") = OneParen
- toListNumberDelim (Just "" ) (Just ")") = OneParen
- toListNumberDelim (Just "(") (Just ")") = TwoParens
- toListNumberDelim _ _ = DefaultDelim
-
-
--- | Determines which style to use for a list, which level to use of that
--- style, and which type of list to create as a result of this information.
--- Then prepares the state for eventual child lists and constructs the list from
--- the results.
--- Two main cases are handled: The list may provide its own style or it may
--- rely on a parent list's style. I the former case the current style in the
--- state must be switched before and after the call to the child converter
--- while in the latter the child converter can be called directly.
--- If anything goes wrong, a default ordered-list-constructor is used.
-constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks
-constructList reader = proc x -> do
- modifyExtraState (shiftListLevel 1) -< ()
- listLevel <- getCurrentListLevel -< ()
- fStyleName <- findAttr NsText "style-name" -< ()
- case fStyleName of
- Right styleName -> do
- fListStyle <- lookupListStyle -< styleName
- case fListStyle of
- Right listStyle -> do
- fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
- case fLLS of
- Just listLevelStyle -> do
- oldListStyle <- switchCurrentListStyle -< Just listStyle
- blocks <- constructListWith listLevelStyle -<< x
- switchCurrentListStyle -< oldListStyle
- returnA -< blocks
- Nothing -> constructOrderedList -< x
- Left _ -> constructOrderedList -< x
- Left _ -> do
- state <- getExtraState -< ()
- mListStyle <- arr currentListStyle -< state
- case mListStyle of
- Just listStyle -> do
- fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
- case fLLS of
- Just listLevelStyle -> constructListWith listLevelStyle -<< x
- Nothing -> constructOrderedList -< x
- Nothing -> constructOrderedList -< x
- where
- constructOrderedList =
- reader
- >>> modifyExtraState (shiftListLevel (-1))
- >>^ orderedList
- constructListWith listLevelStyle =
- reader
- >>> getListConstructor listLevelStyle
- ^>> modifyExtraState (shiftListLevel (-1))
-
---------------------------------------------------------------------------------
--- Readers
---------------------------------------------------------------------------------
-
-type ElementMatcher result = (Namespace, ElementName, OdtReader result result)
-
-type InlineMatcher = ElementMatcher Inlines
-
-type BlockMatcher = ElementMatcher Blocks
-
-
---
-matchingElement :: (Monoid e)
- => Namespace -> ElementName
- -> OdtReaderSafe e e
- -> ElementMatcher e
-matchingElement ns name reader = (ns, name, asResultAccumulator reader)
- where
- asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m)
- asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>)
-
---
-matchChildContent' :: (Monoid result)
- => [ElementMatcher result]
- -> OdtReaderSafe _x result
-matchChildContent' ls = returnV mempty >>> matchContent' ls
-
---
-matchChildContent :: (Monoid result)
- => [ElementMatcher result]
- -> OdtReaderSafe (result, XML.Content) result
- -> OdtReaderSafe _x result
-matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
-
-
---------------------------------------------
--- Matchers
---------------------------------------------
-
-----------------------
--- Basics
-----------------------
-
---
--- | Open Document allows several consecutive spaces if they are marked up
-read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines
-read_plain_text = fst ^&&& read_plain_text' >>% recover
- where
- -- fallible version
- read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines
- read_plain_text' = ( second ( arr extractText )
- >>^ spreadChoice >>?! second text
- )
- >>?% (<>)
- --
- extractText :: XML.Content -> Fallible String
- extractText (XML.Text cData) = succeedWith (XML.cdData cData)
- extractText _ = failEmpty
-
-read_text_seq :: InlineMatcher
-read_text_seq = matchingElement NsText "sequence"
- $ matchChildContent [] read_plain_text
-
-
--- specifically. I honor that, although the current implementation of '(<>)'
--- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein.
--- The rational is to be prepared for future modifications.
-read_spaces :: InlineMatcher
-read_spaces = matchingElement NsText "s" (
- readAttrWithDefault NsText "c" 1 -- how many spaces?
- >>^ fromList.(`replicate` Space)
- )
---
-read_line_break :: InlineMatcher
-read_line_break = matchingElement NsText "line-break"
- $ returnV linebreak
-
---
-read_span :: InlineMatcher
-read_span = matchingElement NsText "span"
- $ withNewStyle
- $ matchChildContent [ read_span
- , read_spaces
- , read_line_break
- , read_link
- , read_note
- , read_citation
- , read_bookmark
- , read_bookmark_start
- , read_reference_start
- , read_bookmark_ref
- , read_reference_ref
- ] read_plain_text
-
---
-read_paragraph :: BlockMatcher
-read_paragraph = matchingElement NsText "p"
- $ constructPara
- $ liftA para
- $ withNewStyle
- $ matchChildContent [ read_span
- , read_spaces
- , read_line_break
- , read_link
- , read_note
- , read_citation
- , read_bookmark
- , read_bookmark_start
- , read_reference_start
- , read_bookmark_ref
- , read_reference_ref
- , read_maybe_nested_img_frame
- , read_text_seq
- ] read_plain_text
-
-
-----------------------
--- Headers
-----------------------
-
---
-read_header :: BlockMatcher
-read_header = matchingElement NsText "h"
- $ proc blocks -> do
- level <- ( readAttrWithDefault NsText "outline-level" 1
- ) -< blocks
- children <- ( matchChildContent [ read_span
- , read_spaces
- , read_line_break
- , read_link
- , read_note
- , read_citation
- , read_bookmark
- , read_bookmark_start
- , read_reference_start
- , read_bookmark_ref
- , read_reference_ref
- , read_maybe_nested_img_frame
- ] read_plain_text
- ) -< blocks
- anchor <- getHeaderAnchor -< children
- let idAttr = (anchor, [], []) -- no classes, no key-value pairs
- arr (uncurry3 headerWith) -< (idAttr, level, children)
-
-----------------------
--- Lists
-----------------------
-
---
-read_list :: BlockMatcher
-read_list = matchingElement NsText "list"
--- $ withIncreasedListLevel
- $ constructList
--- $ liftA bulletList
- $ matchChildContent' [ read_list_item
- ]
---
-read_list_item :: ElementMatcher [Blocks]
-read_list_item = matchingElement NsText "list-item"
- $ liftA (compactify.(:[]))
- ( matchChildContent' [ read_paragraph
- , read_header
- , read_list
- ]
- )
-
-
-----------------------
--- Links
-----------------------
-
-read_link :: InlineMatcher
-read_link = matchingElement NsText "a"
- $ liftA3 link
- ( findAttrWithDefault NsXLink "href" "" )
- ( findAttrWithDefault NsOffice "title" "" )
- ( matchChildContent [ read_span
- , read_note
- , read_citation
- , read_bookmark
- , read_bookmark_start
- , read_reference_start
- , read_bookmark_ref
- , read_reference_ref
- ] read_plain_text )
-
-
--------------------------
--- Footnotes
--------------------------
-
-read_note :: InlineMatcher
-read_note = matchingElement NsText "note"
- $ liftA note
- $ matchChildContent' [ read_note_body ]
-
-read_note_body :: BlockMatcher
-read_note_body = matchingElement NsText "note-body"
- $ matchChildContent' [ read_paragraph ]
-
--------------------------
--- Citations
--------------------------
-
-read_citation :: InlineMatcher
-read_citation = matchingElement NsText "bibliography-mark"
- $ liftA2 cite
- ( liftA2 makeCitation
- ( findAttrWithDefault NsText "identifier" "" )
- ( readAttrWithDefault NsText "number" 0 )
- )
- ( matchChildContent [] read_plain_text )
- where
- makeCitation :: String -> Int -> [Citation]
- makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
-
-
-----------------------
--- Tables
-----------------------
-
---
-read_table :: BlockMatcher
-read_table = matchingElement NsTable "table"
- $ liftA simpleTable'
- $ matchChildContent' [ read_table_row
- ]
-
--- | A simple table without a caption or headers
--- | Infers the number of headers from rows
-simpleTable' :: [[Blocks]] -> Blocks
-simpleTable' [] = simpleTable [] []
-simpleTable' (x : rest) = simpleTable (fmap (const defaults) x) (x : rest)
- where defaults = fromList []
-
---
-read_table_row :: ElementMatcher [[Blocks]]
-read_table_row = matchingElement NsTable "table-row"
- $ liftA (:[])
- $ matchChildContent' [ read_table_cell
- ]
-
---
-read_table_cell :: ElementMatcher [Blocks]
-read_table_cell = matchingElement NsTable "table-cell"
- $ liftA (compactify.(:[]))
- $ matchChildContent' [ read_paragraph
- ]
-
-----------------------
--- Images
-----------------------
-
---
-read_maybe_nested_img_frame :: InlineMatcher
-read_maybe_nested_img_frame = matchingElement NsDraw "frame"
- $ proc blocks -> do
- img <- (findChild' NsDraw "image") -< ()
- case img of
- Just _ -> read_frame -< blocks
- Nothing -> matchChildContent' [ read_frame_text_box ] -< blocks
-
-read_frame :: OdtReaderSafe Inlines Inlines
-read_frame =
- proc blocks -> do
- w <- ( findAttr' NsSVG "width" ) -< ()
- h <- ( findAttr' NsSVG "height" ) -< ()
- titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks
- src <- matchChildContent' [ read_image_src ] -< blocks
- resource <- lookupResource -< src
- _ <- updateMediaWithResource -< resource
- alt <- (matchChildContent [] read_plain_text) -< blocks
- arr (uncurry4 imageWith ) -<
- (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt)
-
-image_attributes :: Maybe String -> Maybe String -> Attr
-image_attributes x y =
- ( "", [], (dim "width" x) ++ (dim "height" y))
- where
- dim _ (Just "") = []
- dim name (Just v) = [(name, v)]
- dim _ Nothing = []
-
-read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor)
-read_image_src = matchingElement NsDraw "image"
- $ proc _ -> do
- imgSrc <- findAttr NsXLink "href" -< ()
- case imgSrc of
- Right src -> returnV src -<< ()
- Left _ -> returnV "" -< ()
-
-read_frame_title :: InlineMatcher
-read_frame_title = matchingElement NsSVG "title"
- $ (matchChildContent [] read_plain_text)
-
-read_frame_text_box :: InlineMatcher
-read_frame_text_box = matchingElement NsDraw "text-box"
- $ proc blocks -> do
- paragraphs <- (matchChildContent' [ read_paragraph ]) -< blocks
- arr read_img_with_caption -< toList paragraphs
-
-read_img_with_caption :: [Block] -> Inlines
-read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) =
- singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
-read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) =
- singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
-read_img_with_caption ( (Para (_ : xs)) : ys) =
- read_img_with_caption ((Para xs) : ys)
-read_img_with_caption _ =
- mempty
-
-----------------------
--- Internal links
-----------------------
-
-_ANCHOR_PREFIX_ :: String
-_ANCHOR_PREFIX_ = "anchor"
-
---
-readAnchorAttr :: OdtReader _x Anchor
-readAnchorAttr = findAttr NsText "name"
-
--- | Beware: may fail
-findAnchorName :: OdtReader AnchorPrefix Anchor
-findAnchorName = ( keepingTheValue readAnchorAttr
- >>^ spreadChoice
- ) >>?! getPrettyAnchor
-
-
---
-maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix
- -> OdtReaderSafe Inlines Inlines
-maybeAddAnchorFrom anchorReader =
- keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem)
- >>>
- proc (inlines, fAnchorElem) -> do
- case fAnchorElem of
- Right anchorElem -> returnA -< anchorElem
- Left _ -> returnA -< inlines
- where
- toAnchorElem :: Anchor -> Inlines
- toAnchorElem anchorID = spanWith (anchorID, [], []) mempty
- -- no classes, no key-value pairs
-
---
-read_bookmark :: InlineMatcher
-read_bookmark = matchingElement NsText "bookmark"
- $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
-
---
-read_bookmark_start :: InlineMatcher
-read_bookmark_start = matchingElement NsText "bookmark-start"
- $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
-
---
-read_reference_start :: InlineMatcher
-read_reference_start = matchingElement NsText "reference-mark-start"
- $ maybeAddAnchorFrom readAnchorAttr
-
--- | Beware: may fail
-findAnchorRef :: OdtReader _x Anchor
-findAnchorRef = ( findAttr NsText "ref-name"
- >>?^ (_ANCHOR_PREFIX_,)
- ) >>?! getPrettyAnchor
-
-
---
-maybeInAnchorRef :: OdtReaderSafe Inlines Inlines
-maybeInAnchorRef = proc inlines -> do
- fRef <- findAnchorRef -< ()
- case fRef of
- Right anchor ->
- arr (toAnchorRef anchor) -<< inlines
- Left _ -> returnA -< inlines
- where
- toAnchorRef :: Anchor -> Inlines -> Inlines
- toAnchorRef anchor = link ('#':anchor) "" -- no title
-
---
-read_bookmark_ref :: InlineMatcher
-read_bookmark_ref = matchingElement NsText "bookmark-ref"
- $ maybeInAnchorRef
- <<< matchChildContent [] read_plain_text
-
---
-read_reference_ref :: InlineMatcher
-read_reference_ref = matchingElement NsText "reference-ref"
- $ maybeInAnchorRef
- <<< matchChildContent [] read_plain_text
-
-
-----------------------
--- Entry point
-----------------------
-
---read_plain_content :: OdtReaderSafe _x Inlines
---read_plain_content = strContent >>^ text
-
-read_text :: OdtReaderSafe _x Pandoc
-read_text = matchChildContent' [ read_header
- , read_paragraph
- , read_list
- , read_table
- ]
- >>^ doc
-
-post_process :: Pandoc -> Pandoc
-post_process (Pandoc m blocks) =
- Pandoc m (post_process' blocks)
-
-post_process' :: [Block] -> [Block]
-post_process' ((Table _ a w h r) : (Div ("", ["caption"], _) [Para inlines] ) : xs) =
- (Table inlines a w h r) : ( post_process' xs )
-post_process' bs = bs
-
-read_body :: OdtReader _x (Pandoc, MediaBag)
-read_body = executeIn NsOffice "body"
- $ executeIn NsOffice "text"
- $ liftAsSuccess
- $ proc inlines -> do
- txt <- read_text -< inlines
- state <- getExtraState -< ()
- returnA -< (post_process txt, getMediaBag state)
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
deleted file mode 100644
index 877443543..000000000
--- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
+++ /dev/null
@@ -1,260 +0,0 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Generic.Fallible
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Data types and utilities representing failure. Most of it is based on the
-"Either" type in its usual configuration (left represents failure).
-
-In most cases, the failure type is implied or required to be a "Monoid".
-
-The choice of "Either" instead of a custom type makes it easier to write
-compatible instances of "ArrowChoice".
--}
-
--- We export everything
-module Text.Pandoc.Readers.Odt.Generic.Fallible where
-
-import Control.Applicative
-import Control.Monad
-
-import qualified Data.Foldable as F
-import Data.Monoid ((<>))
-
--- | Default for now. Will probably become a class at some point.
-type Failure = ()
-
-type Fallible a = Either Failure a
-
-
--- | False -> Left (), True -> Right ()
-boolToEither :: Bool -> Fallible ()
-boolToEither False = Left ()
-boolToEither True = Right ()
-
--- | False -> Left (), True -> Right ()
-boolToChoice :: Bool -> Fallible ()
-boolToChoice False = Left ()
-boolToChoice True = Right ()
-
---
-maybeToEither :: Maybe a -> Fallible a
-maybeToEither (Just a) = Right a
-maybeToEither Nothing = Left ()
-
---
-eitherToMaybe :: Either _l a -> Maybe a
-eitherToMaybe (Left _) = Nothing
-eitherToMaybe (Right a) = Just a
-
--- | > untagEither === either id id
-untagEither :: Either a a -> a
-untagEither (Left a) = a
-untagEither (Right a) = a
-
--- | > fromLeft f === either f id
-fromLeft :: (a -> b) -> Either a b -> b
-fromLeft f (Left a) = f a
-fromLeft _ (Right b) = b
-
--- | > fromRight f === either id f
-fromRight :: (a -> b) -> Either b a -> b
-fromRight _ (Left b) = b
-fromRight f (Right a) = f a
-
--- | > recover a === fromLeft (const a) === either (const a) id
-recover :: a -> Either _f a -> a
-recover a (Left _) = a
-recover _ (Right a) = a
-
--- | I would love to use 'fail'. Alas, 'Monad.fail'...
-failWith :: failure -> Either failure _x
-failWith f = Left f
-
---
-failEmpty :: (Monoid failure) => Either failure _x
-failEmpty = failWith mempty
-
---
-succeedWith :: a -> Either _x a
-succeedWith = Right
-
---
-collapseEither :: Either failure (Either failure x)
- -> Either failure x
-collapseEither (Left f ) = Left f
-collapseEither (Right (Left f)) = Left f
-collapseEither (Right (Right x)) = Right x
-
--- | If either of the values represents an error, the result is a
--- (possibly combined) error. If both values represent a success,
--- both are returned.
-chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b')
-chooseMin = chooseMinWith (,)
-
--- | If either of the values represents an error, the result is a
--- (possibly combined) error. If both values represent a success,
--- a combination is returned.
-chooseMinWith :: (Monoid a) => (b -> b' -> c)
- -> Either a b
- -> Either a b'
- -> Either a c
-chooseMinWith (><) (Right a) (Right b) = Right $ a >< b
-chooseMinWith _ (Left a) (Left b) = Left $ a <> b
-chooseMinWith _ (Left a) _ = Left a
-chooseMinWith _ _ (Left b) = Left b
-
--- | If either of the values represents a non-error, the result is a
--- (possibly combined) non-error. If both values represent an error, an error
--- is returned.
-chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b
-chooseMax = chooseMaxWith (<>)
-
--- | If either of the values represents a non-error, the result is a
--- (possibly combined) non-error. If both values represent an error, an error
--- is returned.
-chooseMaxWith :: (Monoid a) => (b -> b -> b)
- -> Either a b
- -> Either a b
- -> Either a b
-chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b
-chooseMaxWith _ (Left a) (Left b) = Left $ a <> b
-chooseMaxWith _ (Right a) _ = Right a
-chooseMaxWith _ _ (Right b) = Right b
-
-
--- | Class of containers that can escalate contained 'Either's.
--- The word "Vector" is meant in the sense of a disease transmitter.
-class ChoiceVector v where
- spreadChoice :: v (Either f a) -> Either f (v a)
-
--- Let's do a few examples first
-
-instance ChoiceVector Maybe where
- spreadChoice (Just (Left f)) = Left f
- spreadChoice (Just (Right x)) = Right (Just x)
- spreadChoice Nothing = Right Nothing
-
-instance ChoiceVector (Either l) where
- spreadChoice (Right (Left f)) = Left f
- spreadChoice (Right (Right x)) = Right (Right x)
- spreadChoice (Left x ) = Right (Left x)
-
-instance ChoiceVector ((,) a) where
- spreadChoice (_, Left f) = Left f
- spreadChoice (x, Right y) = Right (x,y)
- -- Wasn't there a newtype somewhere with the elements flipped?
-
---
--- More instances later, first some discussion.
---
--- I'll have to freshen up on type system details to see how (or if) to do
--- something like
---
--- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where
--- > :
---
--- But maybe it would be even better to use something like
---
--- > class ChoiceVector v v' f | v -> v' f where
--- > spreadChoice :: v -> Either f v'
---
--- That way, more places in @v@ could spread the cheer, e.g.:
---
--- As before:
--- -- ( a , Either f b) (a , b) f
--- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where
--- > spreadChoice (_, Left f) = Left f
--- > spreadChoice (a, Right b) = Right (a,b)
---
--- But also:
--- -- ( Either f a , b) (a , b) f
--- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where
--- > spreadChoice (Right a,b) = Right (a,b)
--- > spreadChoice (Left f,_) = Left f
---
--- And maybe even:
--- -- ( Either f a , Either f b) (a , b) f
--- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where
--- > spreadChoice (Right a , Right b) = Right (a,b)
--- > spreadChoice (Left f , _ ) = Left f
--- > spreadChoice ( _ , Left f) = Left f
---
--- Of course that would lead to a lot of overlapping instances...
--- But I can't think of a different way. A selector function might help,
--- but not even a "Data.Traversable" is powerful enough for that.
--- But maybe someone has already solved all this with a lens library.
---
--- Well, it's an interesting academic question. But for practical purposes,
--- I have more than enough right now.
-
-instance ChoiceVector ((,,) a b) where
- spreadChoice (_,_, Left f) = Left f
- spreadChoice (a,b, Right x) = Right (a,b,x)
-
-instance ChoiceVector ((,,,) a b c) where
- spreadChoice (_,_,_, Left f) = Left f
- spreadChoice (a,b,c, Right x) = Right (a,b,c,x)
-
-instance ChoiceVector ((,,,,) a b c d) where
- spreadChoice (_,_,_,_, Left f) = Left f
- spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x)
-
-instance ChoiceVector (Const a) where
- spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types
-
--- | Fails on the first error
-instance ChoiceVector [] where
- spreadChoice = sequence -- using the monad instance of Either.
- -- Could be generalized to "Data.Traversable" - but why play
- -- with UndecidableInstances unless this is really needed.
-
--- | Wrapper for a list. While the normal list instance of 'ChoiceVector'
--- fails whenever it can, this type will never fail.
-newtype SuccessList a = SuccessList { collectNonFailing :: [a] }
- deriving ( Eq, Ord, Show )
-
-instance ChoiceVector SuccessList where
- spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
- where unTagRight (Right x) = (x:)
- unTagRight _ = id
-
--- | Like 'catMaybes', but for 'Either'.
-collectRights :: [Either _l r] -> [r]
-collectRights = collectNonFailing . untag . spreadChoice . SuccessList
- where untag = fromLeft (error "Unexpected Left")
-
--- | A version of 'collectRights' generalized to other containers. The
--- container must be both "reducible" and "buildable". Most general containers
--- should fullfill these requirements, but there is no single typeclass
--- (that I know of) for that.
--- Therefore, they are split between 'Foldable' and 'MonadPlus'.
--- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.)
-collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r
-collectRightsF = F.foldr unTagRight mzero
- where unTagRight (Right x) = mplus $ return x
- unTagRight _ = id
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
deleted file mode 100644
index 82ae3e20e..000000000
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ /dev/null
@@ -1,62 +0,0 @@
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Generic.Namespaces
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-A class containing a set of namespace identifiers. Used to convert between
-typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
--}
-
-module Text.Pandoc.Readers.Odt.Generic.Namespaces where
-
-import qualified Data.Map as M
-
---
-type NameSpaceIRI = String
-
---
-type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
-
---
-class (Eq nsID, Ord nsID) => NameSpaceID nsID where
-
- -- | Given a IRI, possibly update the map and return the id of the namespace.
- -- May fail if the namespace is unknown and the application does not
- -- allow unknown namespaces.
- getNamespaceID :: NameSpaceIRI
- -> NameSpaceIRIs nsID
- -> Maybe (NameSpaceIRIs nsID, nsID)
- -- | Given a namespace id, lookup its IRI. May be overriden for performance.
- getIRI :: nsID
- -> NameSpaceIRIs nsID
- -> Maybe NameSpaceIRI
- -- | The root element of an XML document has a namespace, too, and the
- -- "XML.Light-parser" is eager to remove the corresponding namespace
- -- attribute.
- -- As a result, at least this root namespace must be provided.
- getInitialIRImap :: NameSpaceIRIs nsID
-
- getIRI = M.lookup
- getInitialIRImap = M.empty
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
deleted file mode 100644
index afd7d616c..000000000
--- a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
+++ /dev/null
@@ -1,48 +0,0 @@
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Generic.SetMap
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-A map of values to sets of values.
--}
-
-module Text.Pandoc.Readers.Odt.Generic.SetMap where
-
-import qualified Data.Map as M
-import qualified Data.Set as S
-
-type SetMap k v = M.Map k (S.Set v)
-
-empty :: SetMap k v
-empty = M.empty
-
-fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v
-fromList = foldr (uncurry insert) empty
-
-insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v
-insert key value setMap = M.insertWith S.union key (S.singleton value) setMap
-
-union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v
-union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
deleted file mode 100644
index 6c10ed61d..000000000
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ /dev/null
@@ -1,171 +0,0 @@
-{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE ViewPatterns #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Reader.Odt.Generic.Utils
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-General utility functions for the odt reader.
--}
-
-module Text.Pandoc.Readers.Odt.Generic.Utils
-( uncurry3
-, uncurry4
-, uncurry5
-, uncurry6
-, uncurry7
-, uncurry8
-, swap
-, reverseComposition
-, bool
-, tryToRead
-, Lookupable(..)
-, readLookupables
-, readLookupable
-, readPercent
-, findBy
-, swing
-, composition
-) where
-
-import Control.Category ( Category, (>>>), (<<<) )
-import qualified Control.Category as Cat ( id )
-import Control.Monad ( msum )
-
-import qualified Data.Foldable as F ( Foldable, foldr )
-import Data.Maybe
-
-
--- | Aequivalent to
--- > foldr (.) id
--- where '(.)' are 'id' are the ones from "Control.Category"
--- and 'foldr' is the one from "Data.Foldable".
--- The noun-form was chosen to be consistend with 'sum', 'product' etc
--- based on the discussion at
--- <https://groups.google.com/forum/#!topic/haskell-cafe/VkOZM1zaHOI>
--- (that I was not part of)
-composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
-composition = F.foldr (<<<) Cat.id
-
--- | Aequivalent to
--- > foldr (flip (.)) id
--- where '(.)' are 'id' are the ones from "Control.Category"
--- and 'foldr' is the one from "Data.Foldable".
--- A reversed version of 'composition'.
-reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
-reverseComposition = F.foldr (>>>) Cat.id
-
--- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'.
--- Note that the first value is selected if the boolean value is 'False'.
--- That makes 'bool' consistent with the other two. Also, 'bool' now takes its
--- arguments in the exact opposite order compared to the normal if construct.
-bool :: a -> a -> Bool -> a
-bool x _ False = x
-bool _ x True = x
-
--- | This function often makes it possible to switch values with the functions
--- that are applied to them.
---
--- Examples:
--- > swing map :: [a -> b] -> a -> [b]
--- > swing any :: [a -> Bool] -> a -> Bool
--- > swing foldr :: b -> a -> [a -> b -> b] -> b
--- > swing scanr :: c -> a -> [a -> c -> c] -> c
--- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c]
--- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool)
---
--- Stolen from <https://wiki.haskell.org/Pointfree>
-swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d
-swing = flip.(.flip id)
--- swing f c a = f ($ a) c
-
-
--- | Alternative to 'read'/'reads'. The former of these throws errors
--- (nobody wants that) while the latter returns "to much" for simple purposes.
--- This function instead applies 'reads' and returns the first match (if any)
--- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
-
--- | A version of 'reads' that requires a '%' sign after the number
-readPercent :: ReadS Int
-readPercent s = [ (i,s') | (i , r ) <- reads s
- , ("%" , s') <- lex r
- ]
-
--- | Data that can be looked up.
--- This is mostly a utility to read data with kind *.
-class Lookupable a where
- lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- > readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- let result = lookup word lookupTable,
- isJust result,
- let Just a = result
- ]
-
--- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
- $ map ((`lookup` lookupTable).fst)
- $ lex s
-
-uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
-uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
-uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z
-uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z
-uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z
-uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z
-
-uncurry3 fun (a,b,c ) = fun a b c
-uncurry4 fun (a,b,c,d ) = fun a b c d
-uncurry5 fun (a,b,c,d,e ) = fun a b c d e
-uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f
-uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g
-uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h
-
-swap :: (a,b) -> (b,a)
-swap (a,b) = (b,a)
-
--- | A version of "Data.List.find" that uses a converter to a Maybe instance.
--- The returned value is the first which the converter returns in a 'Just'
--- wrapper.
-findBy :: (a -> Maybe b) -> [a] -> Maybe b
-findBy _ [] = Nothing
-findBy f ((f -> Just x):_ ) = Just x
-findBy f ( _:xs) = findBy f xs
-
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
deleted file mode 100644
index 8c03d1a09..000000000
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ /dev/null
@@ -1,1063 +0,0 @@
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE GADTs #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE RecordWildCards #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-A generalized XML parser based on stateful arrows.
-It might be sufficient to define this reader as a comonad, but there is
-not a lot of use in trying.
--}
-
-module Text.Pandoc.Readers.Odt.Generic.XMLConverter
-( ElementName
-, XMLConverterState
-, XMLConverter
-, FallibleXMLConverter
-, swapPosition
-, runConverter
-, runConverter''
-, runConverter'
-, runConverterF'
-, runConverterF
-, getCurrentElement
-, getExtraState
-, setExtraState
-, modifyExtraState
-, convertingExtraState
-, producingExtraState
-, lookupNSiri
-, lookupNSprefix
-, readNSattributes
-, elemName
-, elemNameIs
-, strContent
-, elContent
-, currentElem
-, currentElemIs
-, expectElement
-, elChildren
-, findChildren
-, filterChildren
-, filterChildrenName
-, findChild'
-, findChild
-, filterChild'
-, filterChild
-, filterChildName'
-, filterChildName
-, isSet
-, isSet'
-, isSetWithDefault
-, hasAttrValueOf'
-, failIfNotAttrValueOf
-, isThatTheAttrValue
-, searchAttrIn
-, searchAttrWith
-, searchAttr
-, lookupAttr
-, lookupAttr'
-, lookupAttrWithDefault
-, lookupDefaultingAttr
-, findAttr'
-, findAttr
-, findAttrWithDefault
-, readAttr
-, readAttr'
-, readAttrWithDefault
-, getAttr
--- , (>/<)
--- , (?>/<)
-, executeIn
-, collectEvery
-, withEveryL
-, withEvery
-, tryAll
-, tryAll'
-, IdXMLConverter
-, MaybeEConverter
-, ElementMatchConverter
-, MaybeCConverter
-, ContentMatchConverter
-, makeMatcherE
-, makeMatcherC
-, prepareMatchersE
-, prepareMatchersC
-, matchChildren
-, matchContent''
-, matchContent'
-, matchContent
-) where
-
-import Control.Applicative hiding ( liftA, liftA2 )
-import Control.Monad ( MonadPlus )
-import Control.Arrow
-
-import qualified Data.Map as M
-import qualified Data.Foldable as F
-import Data.Default
-import Data.Maybe
-
-import qualified Text.XML.Light as XML
-
-import Text.Pandoc.Readers.Odt.Arrows.State
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-
-import Text.Pandoc.Readers.Odt.Generic.Namespaces
-import Text.Pandoc.Readers.Odt.Generic.Utils
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-
---------------------------------------------------------------------------------
--- Basis types for readability
---------------------------------------------------------------------------------
-
---
-type ElementName = String
-type AttributeName = String
-type AttributeValue = String
-
---
-type NameSpacePrefix = String
-
---
-type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
-
---------------------------------------------------------------------------------
--- Main converter state
---------------------------------------------------------------------------------
-
--- GADT so some of the NameSpaceID restrictions can be deduced
-data XMLConverterState nsID extraState where
- XMLConverterState :: NameSpaceID nsID
- => { -- | A stack of parent elements. The top element is the current one.
- -- Arguably, a real Zipper would be better. But that is an
- -- optimization that can be made at a later time, e.g. when
- -- replacing Text.XML.Light.
- parentElements :: [XML.Element]
- -- | A map from internal namespace IDs to the namespace prefixes
- -- used in XML elements
- , namespacePrefixes :: NameSpacePrefixes nsID
- -- | A map from internal namespace IDs to namespace IRIs
- -- (Only necessary for matching namespace IDs and prefixes)
- , namespaceIRIs :: NameSpaceIRIs nsID
- -- | A place to put "something else". This feature is used heavily
- -- to keep the main code cleaner. More specifically, the main reader
- -- is divided into different stages. Each stage lifts something up
- -- here, which the next stage can then use. This could of course be
- -- generalized to a state-tree or used for the namespace IRIs. The
- -- border between states and values is an imaginary one, after all.
- -- But the separation as it is seems to be enough for now.
- , moreState :: extraState
- }
- -> XMLConverterState nsID extraState
-
---
-createStartState :: (NameSpaceID nsID)
- => XML.Element
- -> extraState
- -> XMLConverterState nsID extraState
-createStartState element extraState =
- XMLConverterState
- { parentElements = [element]
- , namespacePrefixes = M.empty
- , namespaceIRIs = getInitialIRImap
- , moreState = extraState
- }
-
--- | Functor over extra state
-instance Functor (XMLConverterState nsID) where
- fmap f ( XMLConverterState parents prefixes iRIs extraState )
- = XMLConverterState parents prefixes iRIs (f extraState)
-
---
-replaceExtraState :: extraState
- -> XMLConverterState nsID _x
- -> XMLConverterState nsID extraState
-replaceExtraState x s
- = fmap (const x) s
-
---
-currentElement :: XMLConverterState nsID extraState
- -> XML.Element
-currentElement state = head (parentElements state)
-
--- | Replace the current position by another, modifying the extra state
--- in the process
-swapPosition :: (extraState -> extraState')
- -> [XML.Element]
- -> XMLConverterState nsID extraState
- -> XMLConverterState nsID extraState'
-swapPosition f stack state
- = state { parentElements = stack
- , moreState = f (moreState state)
- }
-
--- | Replace the current position by another, modifying the extra state
--- in the process
-swapStack' :: XMLConverterState nsID extraState
- -> [XML.Element]
- -> ( XMLConverterState nsID extraState , [XML.Element] )
-swapStack' state stack
- = ( state { parentElements = stack }
- , parentElements state
- )
-
---
-pushElement :: XML.Element
- -> XMLConverterState nsID extraState
- -> XMLConverterState nsID extraState
-pushElement e state = state { parentElements = e:(parentElements state) }
-
--- | Pop the top element from the call stack, unless it is the last one.
-popElement :: XMLConverterState nsID extraState
- -> Maybe (XMLConverterState nsID extraState)
-popElement state
- | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es }
- | otherwise = Nothing
-
---------------------------------------------------------------------------------
--- Main type
---------------------------------------------------------------------------------
-
--- It might be a good idea to pack the converters in a GADT
--- Downside: data instead of type
--- Upside: 'Failure' could be made a parameter as well.
-
---
-type XMLConverter nsID extraState input output
- = ArrowState (XMLConverterState nsID extraState ) input output
-
-type FallibleXMLConverter nsID extraState input output
- = XMLConverter nsID extraState input (Fallible output)
-
---
-runConverter :: XMLConverter nsID extraState input output
- -> XMLConverterState nsID extraState
- -> input
- -> output
-runConverter converter state input = snd $ runArrowState converter (state,input)
-
---
-runConverter'' :: (NameSpaceID nsID)
- => XMLConverter nsID extraState (Fallible ()) output
- -> extraState
- -> XML.Element
- -> output
-runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) ()
-
-runConverter' :: (NameSpaceID nsID)
- => FallibleXMLConverter nsID extraState () success
- -> extraState
- -> XML.Element
- -> Fallible success
-runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) ()
-
---
-runConverterF' :: FallibleXMLConverter nsID extraState x y
- -> XMLConverterState nsID extraState
- -> Fallible x -> Fallible y
-runConverterF' a s e = runConverter (returnV e >>? a) s e
-
---
-runConverterF :: (NameSpaceID nsID)
- => FallibleXMLConverter nsID extraState XML.Element x
- -> extraState
- -> Fallible XML.Element -> Fallible x
-runConverterF a s = either failWith
- (\e -> runConverter a (createStartState e s) e)
-
---
-getCurrentElement :: XMLConverter nsID extraState x XML.Element
-getCurrentElement = extractFromState currentElement
-
---
-getExtraState :: XMLConverter nsID extraState x extraState
-getExtraState = extractFromState moreState
-
---
-setExtraState :: XMLConverter nsID extraState extraState extraState
-setExtraState = withState $ \state extra
- -> (replaceExtraState extra state , extra)
-
-
--- | Lifts a function to the extra state.
-modifyExtraState :: (extraState -> extraState)
- -> XMLConverter nsID extraState x x
-modifyExtraState = modifyState.fmap
-
-
--- | First sets the extra state to the new value. Then modifies the original
--- extra state with a converter that uses the new state. Finally, the
--- intermediate state is dropped and the extra state is lifted into the
--- state as it was at the beginning of the function.
--- As a result, exactly the extra state and nothing else is changed.
--- The resulting converter even behaves like an identity converter on the
--- value level.
---
--- (The -ing form is meant to be mnemonic in a sequence of arrows as in
--- convertingExtraState () converter >>> doOtherStuff)
---
-convertingExtraState :: extraState'
- -> FallibleXMLConverter nsID extraState' extraState extraState
- -> FallibleXMLConverter nsID extraState x x
-convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
- where
- setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v
- modifyWithA = keepingTheValue (moreState ^>> a)
- >>^ spreadChoice >>?% flip replaceExtraState
-
--- | First sets the extra state to the new value. Then produces a new
--- extra state with a converter that uses the new state. Finally, the
--- intermediate state is dropped and the extra state is lifted into the
--- state as it was at the beginning of the function.
--- As a result, exactly the extra state and nothing else is changed.
--- The resulting converter even behaves like an identity converter on the
--- value level.
---
--- Aequivalent to
---
--- > \v x a -> convertingExtraState v (returnV x >>> a)
---
--- (The -ing form is meant to be mnemonic in a sequence of arrows as in
--- producingExtraState () () producer >>> doOtherStuff)
---
-producingExtraState :: extraState'
- -> a
- -> FallibleXMLConverter nsID extraState' a extraState
- -> FallibleXMLConverter nsID extraState x x
-producingExtraState v x a = convertingExtraState v (returnV x >>> a)
-
-
---------------------------------------------------------------------------------
--- Work in namespaces
---------------------------------------------------------------------------------
-
--- | Arrow version of 'getIRI'
-lookupNSiri :: (NameSpaceID nsID)
- => nsID
- -> XMLConverter nsID extraState x (Maybe NameSpaceIRI)
-lookupNSiri nsID = extractFromState
- $ \state -> getIRI nsID $ namespaceIRIs state
-
---
-lookupNSprefix :: (NameSpaceID nsID)
- => nsID
- -> XMLConverter nsID extraState x (Maybe NameSpacePrefix)
-lookupNSprefix nsID = extractFromState
- $ \state -> M.lookup nsID $ namespacePrefixes state
-
--- | Extracts namespace attributes from the current element and tries to
--- update the current mapping accordingly
-readNSattributes :: (NameSpaceID nsID)
- => FallibleXMLConverter nsID extraState x ()
-readNSattributes = fromState $ \state -> maybe (state, failEmpty )
- ( , succeedWith ())
- (extractNSAttrs state )
- where
- extractNSAttrs :: (NameSpaceID nsID)
- => XMLConverterState nsID extraState
- -> Maybe (XMLConverterState nsID extraState)
- extractNSAttrs startState
- = foldl (\state d -> state >>= addNS d)
- (Just startState)
- nsAttribs
- where nsAttribs = mapMaybe readNSattr (XML.elAttribs element)
- element = currentElement startState
- readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri)
- = Just (name, iri)
- readNSattr _ = Nothing
- addNS (prefix, iri) state = fmap updateState
- $ getNamespaceID iri
- $ namespaceIRIs state
- where updateState (iris,nsID)
- = state { namespaceIRIs = iris
- , namespacePrefixes = M.insert nsID prefix
- $ namespacePrefixes state
- }
-
---------------------------------------------------------------------------------
--- Common namespace accessors
---------------------------------------------------------------------------------
-
--- | Given a namespace id and an element name, creates a 'XML.QName' for
--- internal use
-elemName :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState x XML.QName
-elemName nsID name = lookupNSiri nsID
- &&& lookupNSprefix nsID
- >>% XML.QName name
-
--- | Checks if a given element matches both a specified namespace id
--- and a specified element name
-elemNameIs :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState XML.Element Bool
-elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName
- where hasThatName e iri = let elName = XML.elName e
- in XML.qName elName == name
- && XML.qURI elName == iri
-
---------------------------------------------------------------------------------
--- General content
---------------------------------------------------------------------------------
-
---
-strContent :: XMLConverter nsID extraState x String
-strContent = getCurrentElement
- >>^ XML.strContent
-
---
-elContent :: XMLConverter nsID extraState x [XML.Content]
-elContent = getCurrentElement
- >>^ XML.elContent
-
---------------------------------------------------------------------------------
--- Current element
---------------------------------------------------------------------------------
-
---
-currentElem :: XMLConverter nsID extraState x (XML.QName)
-currentElem = getCurrentElement
- >>^ XML.elName
-
-currentElemIs :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState x Bool
-currentElemIs nsID name = getCurrentElement
- >>> elemNameIs nsID name
-
-
-
-{-
-currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>>
- (XML.qName >>^ (&&).(== name) )
- ^&&&^
- (XML.qIRI >>^ (==) )
- ) >>% (.)
- ) &&& lookupNSiri nsID >>% ($)
--}
-
---
-expectElement :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState x ()
-expectElement nsID name = currentElemIs nsID name
- >>^ boolToChoice
-
---------------------------------------------------------------------------------
--- Chilren
---------------------------------------------------------------------------------
-
---
-elChildren :: XMLConverter nsID extraState x [XML.Element]
-elChildren = getCurrentElement
- >>^ XML.elChildren
-
---
-findChildren :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState x [XML.Element]
-findChildren nsID name = elemName nsID name
- &&& getCurrentElement
- >>% XML.findChildren
-
---
-filterChildren :: (XML.Element -> Bool)
- -> XMLConverter nsID extraState x [XML.Element]
-filterChildren p = getCurrentElement
- >>^ XML.filterChildren p
-
---
-filterChildrenName :: (XML.QName -> Bool)
- -> XMLConverter nsID extraState x [XML.Element]
-filterChildrenName p = getCurrentElement
- >>^ XML.filterChildrenName p
-
---
-findChild' :: (NameSpaceID nsID)
- => nsID
- -> ElementName
- -> XMLConverter nsID extraState x (Maybe XML.Element)
-findChild' nsID name = elemName nsID name
- &&& getCurrentElement
- >>% XML.findChild
-
---
-findChild :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState x XML.Element
-findChild nsID name = findChild' nsID name
- >>> maybeToChoice
-
---
-filterChild' :: (XML.Element -> Bool)
- -> XMLConverter nsID extraState x (Maybe XML.Element)
-filterChild' p = getCurrentElement
- >>^ XML.filterChild p
-
---
-filterChild :: (XML.Element -> Bool)
- -> FallibleXMLConverter nsID extraState x XML.Element
-filterChild p = filterChild' p
- >>> maybeToChoice
-
---
-filterChildName' :: (XML.QName -> Bool)
- -> XMLConverter nsID extraState x (Maybe XML.Element)
-filterChildName' p = getCurrentElement
- >>^ XML.filterChildName p
-
---
-filterChildName :: (XML.QName -> Bool)
- -> FallibleXMLConverter nsID extraState x XML.Element
-filterChildName p = filterChildName' p
- >>> maybeToChoice
-
-
---------------------------------------------------------------------------------
--- Attributes
---------------------------------------------------------------------------------
-
---
-isSet :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> (Either Failure Bool)
- -> FallibleXMLConverter nsID extraState x Bool
-isSet nsID attrName deflt
- = findAttr' nsID attrName
- >>^ maybe deflt stringToBool
-
---
-isSet' :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x (Maybe Bool)
-isSet' nsID attrName = findAttr' nsID attrName
- >>^ (>>= stringToBool')
-
-isSetWithDefault :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> Bool
- -> XMLConverter nsID extraState x Bool
-isSetWithDefault nsID attrName def'
- = isSet' nsID attrName
- >>^ fromMaybe def'
-
---
-hasAttrValueOf' :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> AttributeValue
- -> XMLConverter nsID extraState x Bool
-hasAttrValueOf' nsID attrName attrValue
- = findAttr nsID attrName
- >>> ( const False ^|||^ (==attrValue))
-
---
-failIfNotAttrValueOf :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> AttributeValue
- -> FallibleXMLConverter nsID extraState x ()
-failIfNotAttrValueOf nsID attrName attrValue
- = hasAttrValueOf' nsID attrName attrValue
- >>^ boolToChoice
-
--- | Is the value that is currently transported in the arrow the value of
--- the specified attribute?
-isThatTheAttrValue :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> FallibleXMLConverter nsID extraState AttributeValue Bool
-isThatTheAttrValue nsID attrName
- = keepingTheValue
- (findAttr nsID attrName)
- >>% right.(==)
-
--- | Lookup value in a dictionary, fail if no attribute found or value
--- not in dictionary
-searchAttrIn :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> [(AttributeValue,a)]
- -> FallibleXMLConverter nsID extraState x a
-searchAttrIn nsID attrName dict
- = findAttr nsID attrName
- >>?^? maybeToChoice.(`lookup` dict )
-
-
--- | Lookup value in a dictionary. Fail if no attribute found. If value not in
--- dictionary, return default value
-searchAttrWith :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> a
- -> [(AttributeValue,a)]
- -> FallibleXMLConverter nsID extraState x a
-searchAttrWith nsID attrName defV dict
- = findAttr nsID attrName
- >>?^ (fromMaybe defV).(`lookup` dict )
-
--- | Lookup value in a dictionary. If attribute or value not found,
--- return default value
-searchAttr :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> a
- -> [(AttributeValue,a)]
- -> XMLConverter nsID extraState x a
-searchAttr nsID attrName defV dict
- = searchAttrIn nsID attrName dict
- >>> const defV ^|||^ id
-
--- | Read a 'Lookupable' attribute. Fail if no match.
-lookupAttr :: (NameSpaceID nsID, Lookupable a)
- => nsID -> AttributeName
- -> FallibleXMLConverter nsID extraState x a
-lookupAttr nsID attrName = lookupAttr' nsID attrName
- >>^ maybeToChoice
-
-
--- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'.
-lookupAttr' :: (NameSpaceID nsID, Lookupable a)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x (Maybe a)
-lookupAttr' nsID attrName
- = findAttr' nsID attrName
- >>^ (>>= readLookupable)
-
--- | Read a 'Lookupable' attribute with explicit default
-lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a)
- => nsID -> AttributeName
- -> a
- -> XMLConverter nsID extraState x a
-lookupAttrWithDefault nsID attrName deflt
- = lookupAttr' nsID attrName
- >>^ fromMaybe deflt
-
--- | Read a 'Lookupable' attribute with implicit default
-lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x a
-lookupDefaultingAttr nsID attrName
- = lookupAttrWithDefault nsID attrName def
-
--- | Return value as a (Maybe String)
-findAttr' :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x (Maybe AttributeValue)
-findAttr' nsID attrName = elemName nsID attrName
- &&& getCurrentElement
- >>% XML.findAttr
-
--- | Return value as string or fail
-findAttr :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> FallibleXMLConverter nsID extraState x AttributeValue
-findAttr nsID attrName = findAttr' nsID attrName
- >>> maybeToChoice
-
--- | Return value as string or return provided default value
-findAttrWithDefault :: (NameSpaceID nsID)
- => nsID -> AttributeName
- -> AttributeValue
- -> XMLConverter nsID extraState x AttributeValue
-findAttrWithDefault nsID attrName deflt
- = findAttr' nsID attrName
- >>^ fromMaybe deflt
-
--- | Read and return value or fail
-readAttr :: (NameSpaceID nsID, Read attrValue)
- => nsID -> AttributeName
- -> FallibleXMLConverter nsID extraState x attrValue
-readAttr nsID attrName = readAttr' nsID attrName
- >>> maybeToChoice
-
--- | Read and return value or return Nothing
-readAttr' :: (NameSpaceID nsID, Read attrValue)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x (Maybe attrValue)
-readAttr' nsID attrName = findAttr' nsID attrName
- >>^ (>>= tryToRead)
-
--- | Read and return value or return provided default value
-readAttrWithDefault :: (NameSpaceID nsID, Read attrValue)
- => nsID -> AttributeName
- -> attrValue
- -> XMLConverter nsID extraState x attrValue
-readAttrWithDefault nsID attrName deflt
- = findAttr' nsID attrName
- >>^ (>>= tryToRead)
- >>^ fromMaybe deflt
-
--- | Read and return value or return default value from 'Default' instance
-getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue)
- => nsID -> AttributeName
- -> XMLConverter nsID extraState x attrValue
-getAttr nsID attrName = readAttrWithDefault nsID attrName def
-
---------------------------------------------------------------------------------
--- Movements
---------------------------------------------------------------------------------
-
---
-jumpThere :: XMLConverter nsID extraState XML.Element XML.Element
-jumpThere = withState (\state element
- -> ( pushElement element state , element )
- )
-
---
-swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element]
-swapStack = withState swapStack'
-
---
-jumpBack :: FallibleXMLConverter nsID extraState _x _x
-jumpBack = tryModifyState (popElement >>> maybeToChoice)
-
--- | Support function for "procedural" converters: jump to an element, execute
--- a converter, jump back.
--- This version is safer than 'executeThere', because it does not rely on the
--- internal stack. As a result, the converter can not move around in arbitrary
--- ways. The downside is of course that some of the environment is not
--- accessible to the converter.
-switchingTheStack :: XMLConverter nsID moreState a b
- -> XMLConverter nsID moreState (a, XML.Element) b
-switchingTheStack a = second ( (:[]) ^>> swapStack )
- >>> first a
- >>> second swapStack
- >>^ fst
-
--- | Support function for "procedural" converters: jumps to an element, executes
--- a converter, jumps back.
--- Make sure that the converter is well-behaved; that is it should
--- return to the exact position it started from in /every possible path/ of
--- execution, even if it "fails". If it does not, you may encounter
--- strange bugs. If you are not sure about the behaviour or want to use
--- shortcuts, you can often use 'switchingTheStack' instead.
-executeThere :: FallibleXMLConverter nsID moreState a b
- -> FallibleXMLConverter nsID moreState (a, XML.Element) b
-executeThere a = second jumpThere
- >>> fst
- ^>> a
- >>> jumpBack -- >>? jumpBack would not ensure the jump.
- >>^ collapseEither
-
--- | Do something in a sub-element, tnen come back
-executeIn :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState f s
- -> FallibleXMLConverter nsID extraState f s
-executeIn nsID name a = keepingTheValue
- (findChild nsID name)
- >>> ignoringState liftFailure
- >>? switchingTheStack a
- where liftFailure (_, (Left f)) = Left f
- liftFailure (x, (Right e)) = Right (x, e)
-
---------------------------------------------------------------------------------
--- Iterating over children
---------------------------------------------------------------------------------
-
--- Helper converter to prepare different types of iterations.
--- It lifts the children (of a certain type) of the current element
--- into the value level and pairs each one with the current input value.
-prepareIteration :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> XMLConverter nsID extraState b [(b, XML.Element)]
-prepareIteration nsID name = keepingTheValue
- (findChildren nsID name)
- >>% distributeValue
-
--- | Applies a converter to every child element of a specific type.
--- Collects results in a 'Monoid'.
--- Fails completely if any conversion fails.
-collectEvery :: (NameSpaceID nsID, Monoid m)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a m
- -> FallibleXMLConverter nsID extraState a m
-collectEvery nsID name a = prepareIteration nsID name
- >>> foldS' (switchingTheStack a)
-
---
-withEveryL :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a b
- -> FallibleXMLConverter nsID extraState a [b]
-withEveryL = withEvery
-
--- | Applies a converter to every child element of a specific type.
--- Collects results in a 'MonadPlus'.
--- Fails completely if any conversion fails.
-withEvery :: (NameSpaceID nsID, MonadPlus m)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a b
- -> FallibleXMLConverter nsID extraState a (m b)
-withEvery nsID name a = prepareIteration nsID name
- >>> iterateS' (switchingTheStack a)
-
--- | Applies a converter to every child element of a specific type.
--- Collects all successful results in a list.
-tryAll :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState b a
- -> XMLConverter nsID extraState b [a]
-tryAll nsID name a = prepareIteration nsID name
- >>> iterateS (switchingTheStack a)
- >>^ collectRights
-
--- | Applies a converter to every child element of a specific type.
--- Collects all successful results.
-tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState b a
- -> XMLConverter nsID extraState b (c a)
-tryAll' nsID name a = prepareIteration nsID name
- >>> iterateS (switchingTheStack a)
- >>^ collectRightsF
-
---------------------------------------------------------------------------------
--- Matching children
---------------------------------------------------------------------------------
-
-type IdXMLConverter nsID moreState x
- = XMLConverter nsID moreState x x
-
-type MaybeEConverter nsID moreState x
- = Maybe (IdXMLConverter nsID moreState (x, XML.Element))
-
--- Chainable converter that helps deciding which converter to actually use.
-type ElementMatchConverter nsID extraState x
- = IdXMLConverter nsID
- extraState
- (MaybeEConverter nsID extraState x, XML.Element)
-
-type MaybeCConverter nsID moreState x
- = Maybe (IdXMLConverter nsID moreState (x, XML.Content))
-
--- Chainable converter that helps deciding which converter to actually use.
-type ContentMatchConverter nsID extraState x
- = IdXMLConverter nsID
- extraState
- (MaybeCConverter nsID extraState x, XML.Content)
-
--- Helper function: The @c@ is actually a converter that is to be selected by
--- matching XML elements to the first two parameters.
--- The fold used to match elements however is very simple, so to use it,
--- this function wraps the converter in another converter that unifies
--- the accumulator. Think of a lot of converters with the resulting type
--- chained together. The accumulator not only transports the element
--- unchanged to the next matcher, it also does the actual selecting by
--- combining the intermediate results with '(<|>)'.
-makeMatcherE :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a a
- -> ElementMatchConverter nsID extraState a
-makeMatcherE nsID name c = ( second (
- elemNameIs nsID name
- >>^ bool Nothing (Just tryC)
- )
- >>% (<|>)
- ) &&&^ snd
- where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd
-
--- Helper function: The @c@ is actually a converter that is to be selected by
--- matching XML content to the first two parameters.
--- The fold used to match elements however is very simple, so to use it,
--- this function wraps the converter in another converter that unifies
--- the accumulator. Think of a lot of converters with the resulting type
--- chained together. The accumulator not only transports the element
--- unchanged to the next matcher, it also does the actual selecting by
--- combining the intermediate results with '(<|>)'.
-makeMatcherC :: (NameSpaceID nsID)
- => nsID -> ElementName
- -> FallibleXMLConverter nsID extraState a a
- -> ContentMatchConverter nsID extraState a
-makeMatcherC nsID name c = ( second ( contentToElem
- >>> returnV Nothing
- ||| ( elemNameIs nsID name
- >>^ bool Nothing (Just cWithJump)
- )
- )
- >>% (<|>)
- ) &&&^ snd
- where cWithJump = ( fst
- ^&&& ( second contentToElem
- >>> spreadChoice
- ^>>? executeThere c
- )
- >>% recover)
- &&&^ snd
- contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element
- contentToElem = arr $ \e -> case e of
- XML.Elem e' -> succeedWith e'
- _ -> failEmpty
-
--- Creates and chains a bunch of matchers
-prepareMatchersE :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
- -> ElementMatchConverter nsID extraState x
---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE)
-prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE)
-
--- Creates and chains a bunch of matchers
-prepareMatchersC :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
- -> ContentMatchConverter nsID extraState x
---prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC)
-prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC)
-
--- | Takes a list of element-data - converter groups and
--- * Finds all children of the current element
--- * Matches each group to each child in order (at most one group per child)
--- * Filters non-matched children
--- * Chains all found converters in child-order
--- * Applies the chain to the input element
-matchChildren :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState a a
-matchChildren lookups = let matcher = prepareMatchersE lookups
- in keepingTheValue (
- elChildren
- >>> map (Nothing,)
- ^>> iterateSL matcher
- >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m)
- -- >>> foldSs
- >>> reverseComposition
- )
- >>> swap
- ^>> app
- where
- -- let the converter swallow the element and drop the element
- -- in the return value
- swallowElem element converter = (,element) ^>> converter >>^ fst
-
---
-matchContent'' :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState a a
-matchContent'' lookups = let matcher = prepareMatchersC lookups
- in keepingTheValue (
- elContent
- >>> map (Nothing,)
- ^>> iterateSL matcher
- >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m)
- -- >>> foldSs
- >>> reverseComposition
- )
- >>> swap
- ^>> app
- where
- -- let the converter swallow the content and drop the content
- -- in the return value
- swallowContent content converter = (,content) ^>> converter >>^ fst
-
-
--- | Takes a list of element-data - converter groups and
--- * Finds all content of the current element
--- * Matches each group to each piece of content in order
--- (at most one group per piece of content)
--- * Filters non-matched content
--- * Chains all found converters in content-order
--- * Applies the chain to the input element
-matchContent' :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState a a
-matchContent' lookups = matchContent lookups (arr fst)
-
--- | Takes a list of element-data - converter groups and
--- * Finds all content of the current element
--- * Matches each group to each piece of content in order
--- (at most one group per piece of content)
--- * Adds a default converter for all non-matched content
--- * Chains all found converters in content-order
--- * Applies the chain to the input element
-matchContent :: (NameSpaceID nsID)
- => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
- -> XMLConverter nsID extraState (a,XML.Content) a
- -> XMLConverter nsID extraState a a
-matchContent lookups fallback
- = let matcher = prepareMatchersC lookups
- in keepingTheValue (
- elContent
- >>> map (Nothing,)
- ^>> iterateSL matcher
- >>^ map swallowOrFallback
- -- >>> foldSs
- >>> reverseComposition
- )
- >>> swap
- ^>> app
- where
- -- let the converter swallow the content and drop the content
- -- in the return value
- swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst
- swallowOrFallback (Nothing ,content) = (,content) ^>> fallback
-
---------------------------------------------------------------------------------
--- Internals
---------------------------------------------------------------------------------
-
-stringToBool :: (Monoid failure) => String -> Either failure Bool
-stringToBool val -- stringToBool' val >>> maybeToChoice
- | val `elem` trueValues = succeedWith True
- | val `elem` falseValues = succeedWith False
- | otherwise = failEmpty
- where trueValues = ["true" ,"on" ,"1"]
- falseValues = ["false","off","0"]
-
-stringToBool' :: String -> Maybe Bool
-stringToBool' val | val `elem` trueValues = Just True
- | val `elem` falseValues = Just False
- | otherwise = Nothing
- where trueValues = ["true" ,"on" ,"1"]
- falseValues = ["false","off","0"]
-
-
-distributeValue :: a -> [b] -> [(a,b)]
-distributeValue = map.(,)
-
---------------------------------------------------------------------------------
-
-{-
-NOTES
-It might be a good idea to refactor the namespace stuff.
-E.g.: if a namespace constructor took a string as a parameter, things like
-> a ?>/< (NsText,"body")
-would be nicer.
-Together with a rename and some trickery, something like
-> |< NsText "body" >< NsText "p" ?> a </> </>|
-might even be possible.
-
-Some day, XML.Light should be replaced by something better.
-While doing that, it might be useful to replace String as the type of element
-names with something else, too. (Of course with OverloadedStrings).
-While doing that, maybe the types can be created in a way that something like
-> NsText:"body"
-could be used. Overloading (:) does not sounds like the best idea, but if the
-element name type was a list, this might be possible.
-Of course that would be a bit hackish, so the "right" way would probably be
-something like
-> InNS NsText "body"
-but isn't that a bit boring? ;)
--}
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
deleted file mode 100644
index deb009998..000000000
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ /dev/null
@@ -1,110 +0,0 @@
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Reader.Odt.Namespaces
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Namespaces used in odt files.
--}
-
-module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
- ) where
-
-import Data.List ( isPrefixOf )
-import Data.Maybe ( fromMaybe, listToMaybe )
-import qualified Data.Map as M ( empty, insert )
-
-import Text.Pandoc.Readers.Odt.Generic.Namespaces
-
-
-instance NameSpaceID Namespace where
-
- getInitialIRImap = nsIDmap
-
- getNamespaceID "" m = Just(m, NsXML)
- getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri)
- where asPair nsID = Just (M.insert nsID iri m, nsID)
-
-
-findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri]
-
-nsIDmap :: NameSpaceIRIs Namespace
-nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
-
-data Namespace = -- Open Document core
- NsOffice | NsStyle | NsText | NsTable | NsForm
- | NsDraw | Ns3D | NsAnim | NsChart | NsConfig
- | NsDB | NsMeta | NsNumber | NsScript | NsManifest
- | NsPresentation
- -- Metadata
- | NsODF
- -- Compatible elements
- | NsXSL_FO | NsSVG | NsSmil
- -- External standards
- | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL
- | NsDublinCore
- -- Metadata manifest
- | NsPKG
- -- Others
- | NsOpenFormula
- -- Core XML (basically only for the 'id'-attribute)
- | NsXML
- -- Fallback
- | NsOther String
- deriving ( Eq, Ord, Show )
-
--- | Not the actual iri's, but large prefixes of them - this way there are
--- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
-nsIDs = [
- ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
- ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
- ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ),
- ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ),
- ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ),
- ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ),
- ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ),
- ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ),
- ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ),
- ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ),
- ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ),
- ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ),
- ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ),
- ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ),
- ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ),
- ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ),
- ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ),
- ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ),
- ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ),
- ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ),
- ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ),
- ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ),
- ("http://purl.org/dc/elements" , NsDublinCore ),
- ("http://www.w3.org/2003/g/data-view" , NsGRDDL ),
- ("http://www.w3.org/1998/Math/MathML" , NsMathML ),
- ("http://www.w3.org/1999/xhtml" , NsXHtml ),
- ("http://www.w3.org/2002/xforms" , NsXForms ),
- ("http://www.w3.org/1999/xlink" , NsXLink )
- ]
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
deleted file mode 100644
index 26ba6df82..000000000
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ /dev/null
@@ -1,744 +0,0 @@
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE Arrows #-}
-
-{-
-Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Odt.StyleReader
- Copyright : Copyright (C) 2015 Martin Linnemann
- License : GNU GPL, version 2 or above
-
- Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Reader for the style information in an odt document.
--}
-
-module Text.Pandoc.Readers.Odt.StyleReader
-( Style (..)
-, StyleName
-, StyleFamily (..)
-, Styles (..)
-, StyleProperties (..)
-, TextProperties (..)
-, ParaProperties (..)
-, VerticalTextPosition (..)
-, ListItemNumberFormat (..)
-, ListLevel
-, ListStyle (..)
-, ListLevelStyle (..)
-, ListLevelType (..)
-, LengthOrPercent (..)
-, lookupStyle
-, getTextProperty
-, getTextProperty'
-, getParaProperty
-, getListStyle
-, getListLevelStyle
-, getStyleFamily
-, lookupDefaultStyle
-, lookupDefaultStyle'
-, lookupListStyleByName
-, getPropertyChain
-, textPropertyChain
-, stylePropertyChain
-, stylePropertyChain'
-, getStylePropertyChain
-, extendedStylePropertyChain
-, extendedStylePropertyChain'
-, liftStyles
-, readStylesAt
-) where
-
-import Control.Arrow
-import Control.Applicative hiding ( liftA, liftA2, liftA3 )
-
-import qualified Data.Foldable as F
-import qualified Data.Map as M
-import qualified Data.Set as S
-import Data.Char ( isDigit )
-import Data.Default
-import Data.List ( unfoldr )
-import Data.Maybe
-
-import qualified Text.XML.Light as XML
-
-import Text.Pandoc.Readers.Odt.Arrows.State
-import Text.Pandoc.Readers.Odt.Arrows.Utils
-
-import Text.Pandoc.Readers.Odt.Generic.Utils
-import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM
-import Text.Pandoc.Readers.Odt.Generic.Fallible
-import Text.Pandoc.Readers.Odt.Generic.XMLConverter
-
-import Text.Pandoc.Readers.Odt.Namespaces
-import Text.Pandoc.Readers.Odt.Base
-
-
-readStylesAt :: XML.Element -> Fallible Styles
-readStylesAt e = runConverter' readAllStyles mempty e
-
---------------------------------------------------------------------------------
--- Reader for font declarations and font pitches
---------------------------------------------------------------------------------
-
--- Pandoc has no support for different font pitches. Yet knowing them can be
--- very helpful in cases where Pandoc has more semantics than OpenDocument.
--- In these cases, the pitch can help deciding as what to define a block of
--- text. So let's start with a type for font pitches:
-
-data FontPitch = PitchVariable | PitchFixed
- deriving ( Eq, Show )
-
-instance Lookupable FontPitch where
- lookupTable = [ ("variable" , PitchVariable)
- , ("fixed" , PitchFixed )
- ]
-
-instance Default FontPitch where
- def = PitchVariable
-
--- The font pitch can be specifed in a style directly. Normally, however,
--- it is defined in the font. That is also the specs' recommendation.
---
--- Thus, we want
-
-type FontFaceName = String
-
-type FontPitches = M.Map FontFaceName FontPitch
-
--- To get there, the fonts have to be read and the pitches extracted.
--- But the resulting map are only needed at one later place, so it should not be
--- transported on the value level, especially as we already use a state arrow.
--- So instead, the resulting map is lifted into the state of the reader.
--- (An alternative might be ImplicitParams, but again, we already have a state.)
---
--- So the main style readers will have the types
-type StyleReader a b = XMLReader FontPitches a b
--- and
-type StyleReaderSafe a b = XMLReaderSafe FontPitches a b
--- respectively.
---
--- But before we can work with these, we need to define the reader that reads
--- the fonts:
-
--- | A reader for font pitches
-fontPitchReader :: XMLReader _s _x FontPitches
-fontPitchReader = executeIn NsOffice "font-face-decls" (
- ( withEveryL NsStyle "font-face" $ liftAsSuccess (
- findAttr' NsStyle "name"
- &&&
- lookupDefaultingAttr NsStyle "font-pitch"
- )
- )
- >>?^ ( M.fromList . (foldl accumLegalPitches []) )
- )
- where accumLegalPitches ls (Nothing,_) = ls
- accumLegalPitches ls (Just n,p) = (n,p):ls
-
-
--- | A wrapper around the font pitch reader that lifts the result into the
--- state.
-readFontPitches :: StyleReader x x
-readFontPitches = producingExtraState () () fontPitchReader
-
-
--- | Looking up a pitch in the state of the arrow.
---
--- The function does the following:
--- * Look for the font pitch in an attribute.
--- * If that fails, look for the font name, look up the font in the state
--- and use the pitch from there.
--- * Return the result in a Maybe
---
-findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch)
-findPitch = ( lookupAttr NsStyle "font-pitch"
- `ifFailedDo` findAttr NsStyle "font-name"
- >>? ( keepingTheValue getExtraState
- >>% M.lookup
- >>^ maybeToChoice
- )
- )
- >>> choiceToMaybe
-
---------------------------------------------------------------------------------
--- Definitions of main data
---------------------------------------------------------------------------------
-
-type StyleName = String
-
--- | There are two types of styles: named styles with a style family and an
--- optional style parent, and default styles for each style family,
--- defining default style properties
-data Styles = Styles
- { stylesByName :: M.Map StyleName Style
- , listStylesByName :: M.Map StyleName ListStyle
- , defaultStyleMap :: M.Map StyleFamily StyleProperties
- }
- deriving ( Show )
-
--- Styles from a monoid under union
-instance Monoid Styles where
- mempty = Styles M.empty M.empty M.empty
- mappend (Styles sBn1 dSm1 lsBn1)
- (Styles sBn2 dSm2 lsBn2)
- = Styles (M.union sBn1 sBn2)
- (M.union dSm1 dSm2)
- (M.union lsBn1 lsBn2)
-
--- Not all families from the specifications are implemented, only those we need.
--- But there are none that are not mentioned here.
-data StyleFamily = FaText | FaParagraph
--- | FaTable | FaTableCell | FaTableColumn | FaTableRow
--- | FaGraphic | FaDrawing | FaChart
--- | FaPresentation
--- | FaRuby
- deriving ( Eq, Ord, Show )
-
-instance Lookupable StyleFamily where
- lookupTable = [ ( "text" , FaText )
- , ( "paragraph" , FaParagraph )
--- , ( "table" , FaTable )
--- , ( "table-cell" , FaTableCell )
--- , ( "table-column" , FaTableColumn )
--- , ( "table-row" , FaTableRow )
--- , ( "graphic" , FaGraphic )
--- , ( "drawing-page" , FaDrawing )
--- , ( "chart" , FaChart )
--- , ( "presentation" , FaPresentation )
--- , ( "ruby" , FaRuby )
- ]
-
--- | A named style
-data Style = Style { styleFamily :: Maybe StyleFamily
- , styleParentName :: Maybe StyleName
- , listStyle :: Maybe StyleName
- , styleProperties :: StyleProperties
- }
- deriving ( Eq, Show )
-
-data StyleProperties = SProps { textProperties :: Maybe TextProperties
- , paraProperties :: Maybe ParaProperties
--- , tableColProperties :: Maybe TColProperties
--- , tableRowProperties :: Maybe TRowProperties
--- , tableCellProperties :: Maybe TCellProperties
--- , tableProperties :: Maybe TableProperties
--- , graphicProperties :: Maybe GraphProperties
- }
- deriving ( Eq, Show )
-
-instance Default StyleProperties where
- def = SProps { textProperties = Just def
- , paraProperties = Just def
- }
-
-data TextProperties = PropT { isEmphasised :: Bool
- , isStrong :: Bool
- , pitch :: Maybe FontPitch
- , verticalPosition :: VerticalTextPosition
- , underline :: Maybe UnderlineMode
- , strikethrough :: Maybe UnderlineMode
- }
- deriving ( Eq, Show )
-
-instance Default TextProperties where
- def = PropT { isEmphasised = False
- , isStrong = False
- , pitch = Just def
- , verticalPosition = def
- , underline = Nothing
- , strikethrough = Nothing
- }
-
-data ParaProperties = PropP { paraNumbering :: ParaNumbering
- , indentation :: LengthOrPercent
- , margin_left :: LengthOrPercent
- }
- deriving ( Eq, Show )
-
-instance Default ParaProperties where
- def = PropP { paraNumbering = NumberingNone
- , indentation = def
- , margin_left = def
- }
-
-----
--- All the little data types that make up the properties
-----
-
-data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub
- deriving ( Eq, Show )
-
-instance Default VerticalTextPosition where
- def = VPosNormal
-
-instance Read VerticalTextPosition where
- readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ]
- ++ [ (VPosSuper , s') | ("super" , s') <- lexS ]
- ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ]
- where
- lexS = lex s
- signumToVPos n | n < 0 = VPosSub
- | n > 0 = VPosSuper
- | otherwise = VPosNormal
-
-data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace
- deriving ( Eq, Show )
-
-instance Lookupable UnderlineMode where
- lookupTable = [ ( "continuous" , UnderlineModeNormal )
- , ( "skip-white-space" , UnderlineModeSkipWhitespace )
- ]
-
-
-data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int
- deriving ( Eq, Show )
-
-data LengthOrPercent = LengthValueMM Int | PercentValue Int
- deriving ( Eq, Show )
-
-instance Default LengthOrPercent where
- def = LengthValueMM 0
-
-instance Read LengthOrPercent where
- readsPrec _ s =
- [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s]
- ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s
- , (unit , s'') <- reads s'
- , let lengthMM = estimateInMillimeter
- length' unit
- ]
-
-data XslUnit = XslUnitMM | XslUnitCM
- | XslUnitInch
- | XslUnitPoints | XslUnitPica
- | XslUnitPixel
- | XslUnitEM
-
-instance Show XslUnit where
- show XslUnitMM = "mm"
- show XslUnitCM = "cm"
- show XslUnitInch = "in"
- show XslUnitPoints = "pt"
- show XslUnitPica = "pc"
- show XslUnitPixel = "px"
- show XslUnitEM = "em"
-
-instance Read XslUnit where
- readsPrec _ "mm" = [(XslUnitMM , "")]
- readsPrec _ "cm" = [(XslUnitCM , "")]
- readsPrec _ "in" = [(XslUnitInch , "")]
- readsPrec _ "pt" = [(XslUnitPoints , "")]
- readsPrec _ "pc" = [(XslUnitPica , "")]
- readsPrec _ "px" = [(XslUnitPixel , "")]
- readsPrec _ "em" = [(XslUnitEM , "")]
- readsPrec _ _ = []
-
--- | Rough conversion of measures into millimeters.
--- Pixels and em's are actually implemetation dependant/relative measures,
--- so I could not really easily calculate anything exact here even if I wanted.
--- But I do not care about exactness right now, as I only use measures
--- to determine if a paragraph is "indented" or not.
-estimateInMillimeter :: Int -> XslUnit -> Int
-estimateInMillimeter n XslUnitMM = n
-estimateInMillimeter n XslUnitCM = n * 10
-estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4
-estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4
-estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4
-estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4
-estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4
-
-
-----
--- List styles
-----
-
-type ListLevel = Int
-
-newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle
- }
- deriving ( Eq, Show )
-
---
-getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle
-getListLevelStyle level ListStyle{..} =
- let (lower , exactHit , _) = M.splitLookup level levelStyles
- in exactHit <|> fmap fst (M.maxView lower)
- -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1]
- -- \^ simpler, but in general less efficient
-
-data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
- , listItemPrefix :: Maybe String
- , listItemSuffix :: Maybe String
- , listItemFormat :: ListItemNumberFormat
- , listItemStart :: Int
- }
- deriving ( Eq, Ord )
-
-instance Show ListLevelStyle where
- show ListLevelStyle{..} = "<LLS|"
- ++ (show listLevelType)
- ++ "|"
- ++ (maybeToString listItemPrefix)
- ++ (show listItemFormat)
- ++ (maybeToString listItemSuffix)
- ++ ">"
- where maybeToString = fromMaybe ""
-
-data ListLevelType = LltBullet | LltImage | LltNumbered
- deriving ( Eq, Ord, Show )
-
-data ListItemNumberFormat = LinfNone
- | LinfNumber
- | LinfRomanLC | LinfRomanUC
- | LinfAlphaLC | LinfAlphaUC
- | LinfString String
- deriving ( Eq, Ord )
-
-instance Show ListItemNumberFormat where
- show LinfNone = ""
- show LinfNumber = "1"
- show LinfRomanLC = "i"
- show LinfRomanUC = "I"
- show LinfAlphaLC = "a"
- show LinfAlphaUC = "A"
- show (LinfString s) = s
-
-instance Default ListItemNumberFormat where
- def = LinfNone
-
-instance Read ListItemNumberFormat where
- readsPrec _ "" = [(LinfNone , "")]
- readsPrec _ "1" = [(LinfNumber , "")]
- readsPrec _ "i" = [(LinfRomanLC , "")]
- readsPrec _ "I" = [(LinfRomanUC , "")]
- readsPrec _ "a" = [(LinfAlphaLC , "")]
- readsPrec _ "A" = [(LinfAlphaUC , "")]
- readsPrec _ s = [(LinfString s , "")]
-
---------------------------------------------------------------------------------
--- Readers
---
--- ...it seems like a whole lot of this should be automatically deriveable
--- or at least moveable into a class. Most of this is data concealed in
--- code.
---------------------------------------------------------------------------------
-
---
-readAllStyles :: StyleReader _x Styles
-readAllStyles = ( readFontPitches
- >>?! ( readAutomaticStyles
- &&& readStyles ))
- >>?%? chooseMax
- -- all top elements are always on the same hierarchy level
-
---
-readStyles :: StyleReader _x Styles
-readStyles = executeIn NsOffice "styles" $ liftAsSuccess
- $ liftA3 Styles
- ( tryAll NsStyle "style" readStyle >>^ M.fromList )
- ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
- ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList )
-
---
-readAutomaticStyles :: StyleReader _x Styles
-readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess
- $ liftA3 Styles
- ( tryAll NsStyle "style" readStyle >>^ M.fromList )
- ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
- ( returnV M.empty )
-
---
-readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties)
-readDefaultStyle = lookupAttr NsStyle "family"
- >>?! keepingTheValue readStyleProperties
-
---
-readStyle :: StyleReader _x (StyleName,Style)
-readStyle = findAttr NsStyle "name"
- >>?! keepingTheValue
- ( liftA4 Style
- ( lookupAttr' NsStyle "family" )
- ( findAttr' NsStyle "parent-style-name" )
- ( findAttr' NsStyle "list-style-name" )
- readStyleProperties
- )
-
---
-readStyleProperties :: StyleReaderSafe _x StyleProperties
-readStyleProperties = liftA2 SProps
- ( readTextProperties >>> choiceToMaybe )
- ( readParaProperties >>> choiceToMaybe )
-
---
-readTextProperties :: StyleReader _x TextProperties
-readTextProperties =
- executeIn NsStyle "text-properties" $ liftAsSuccess
- ( liftA6 PropT
- ( searchAttr NsXSL_FO "font-style" False isFontEmphasised )
- ( searchAttr NsXSL_FO "font-weight" False isFontBold )
- ( findPitch )
- ( getAttr NsStyle "text-position" )
- ( readUnderlineMode )
- ( readStrikeThroughMode )
- )
- where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
- isFontBold = ("normal",False):("bold",True)
- :(map ((,True).show) ([100,200..900]::[Int]))
-
-readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
-readUnderlineMode = readLineMode "text-underline-mode"
- "text-underline-style"
-
-readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
-readStrikeThroughMode = readLineMode "text-line-through-mode"
- "text-line-through-style"
-
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
-readLineMode modeAttr styleAttr = proc x -> do
- isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
- mode <- lookupAttr' NsStyle modeAttr -< x
- if isUL
- then case mode of
- Just m -> returnA -< Just m
- Nothing -> returnA -< Just UnderlineModeNormal
- else returnA -< Nothing
- where
- isLinePresent = [("none",False)] ++ map (,True)
- [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted"
- , "long-dash" , "solid" , "wave"
- ]
-
---
-readParaProperties :: StyleReader _x ParaProperties
-readParaProperties =
- executeIn NsStyle "paragraph-properties" $ liftAsSuccess
- ( liftA3 PropP
- ( liftA2 readNumbering
- ( isSet' NsText "number-lines" )
- ( readAttr' NsText "line-number" )
- )
- ( liftA2 readIndentation
- ( isSetWithDefault NsStyle "auto-text-indent" False )
- ( getAttr NsXSL_FO "text-indent" )
- )
- ( getAttr NsXSL_FO "margin-left" )
- )
- where readNumbering (Just True) (Just n) = NumberingRestart n
- readNumbering (Just True) _ = NumberingKeep
- readNumbering _ _ = NumberingNone
-
- readIndentation False indent = indent
- readIndentation True _ = def
-
-----
--- List styles
-----
-
---
-readListStyle :: StyleReader _x (StyleName, ListStyle)
-readListStyle =
- findAttr NsStyle "name"
- >>?! keepingTheValue
- ( liftA ListStyle
- $ ( liftA3 SM.union3
- ( readListLevelStyles NsText "list-level-style-number" LltNumbered )
- ( readListLevelStyles NsText "list-level-style-bullet" LltBullet )
- ( readListLevelStyles NsText "list-level-style-image" LltImage )
- ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle
- )
---
-readListLevelStyles :: Namespace -> ElementName
- -> ListLevelType
- -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle)
-readListLevelStyles namespace elementName levelType =
- ( tryAll namespace elementName (readListLevelStyle levelType)
- >>^ SM.fromList
- )
-
---
-readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
-readListLevelStyle levelType = readAttr NsText "level"
- >>?! keepingTheValue
- ( liftA5 toListLevelStyle
- ( returnV levelType )
- ( findAttr' NsStyle "num-prefix" )
- ( findAttr' NsStyle "num-suffix" )
- ( getAttr NsStyle "num-format" )
- ( findAttr' NsText "start-value" )
- )
- where
- toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)
- toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b)
- toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b)
- startValue (Just "") = 1
- startValue (Just v) = if all isDigit v
- then read v
- else 1
- startValue Nothing = 1
-
---
-chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
- | otherwise = Just ( F.foldr1 select ls )
- where
- select ( ListLevelStyle t1 p1 s1 f1 b1 )
- ( ListLevelStyle t2 p2 s2 f2 _ )
- = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
- select' LltNumbered _ = LltNumbered
- select' _ LltNumbered = LltNumbered
- select' _ _ = LltBullet
- selectLinf LinfNone f2 = f2
- selectLinf f1 LinfNone = f1
- selectLinf (LinfString _) f2 = f2
- selectLinf f1 (LinfString _) = f1
- selectLinf f1 _ = f1
-
-
---------------------------------------------------------------------------------
--- Tools to access style data
---------------------------------------------------------------------------------
-
---
-lookupStyle :: StyleName -> Styles -> Maybe Style
-lookupStyle name Styles{..} = M.lookup name stylesByName
-
---
-lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties
-lookupDefaultStyle family Styles{..} = fromMaybe def
- (M.lookup family defaultStyleMap)
-
---
-lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties
-lookupDefaultStyle' Styles{..} family = fromMaybe def
- (M.lookup family defaultStyleMap)
-
---
-getListStyle :: Style -> Styles -> Maybe ListStyle
-getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles)
-
---
-lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle
-lookupListStyleByName name Styles{..} = M.lookup name listStylesByName
-
-
--- | Returns a chain of parent of the current style. The direct parent will
--- be the first element of the list, followed by its parent and so on.
--- The current style is not in the list.
-parents :: Style -> Styles -> [Style]
-parents style styles = unfoldr findNextParent style -- Ha!
- where findNextParent Style{..}
- = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName
-
--- | Looks up the style family of the current style. Normally, every style
--- should have one. But if not, all parents are searched.
-getStyleFamily :: Style -> Styles -> Maybe StyleFamily
-getStyleFamily style@Style{..} styles
- = styleFamily
- <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles)
-
--- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property
--- values are specified. Instead, a value might be inherited from a
--- parent style. This function makes this chain of inheritance
--- concrete and easily accessible by encapsulating the necessary lookups.
--- The resulting list contains the direct properties of the style as the first
--- element, the ones of the direct parent element as the next one, and so on.
---
--- Note: There should also be default properties for each style family. These
--- are @not@ contained in this list because properties inherited from
--- parent elements take precedence over default styles.
---
--- This function is primarily meant to be used through convenience wrappers.
---
-stylePropertyChain :: Style -> Styles -> [StyleProperties]
-stylePropertyChain style styles
- = map styleProperties (style : parents style styles)
-
---
-extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties]
-extendedStylePropertyChain [] _ = []
-extendedStylePropertyChain [style] styles = (stylePropertyChain style styles)
- ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
-extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles)
- ++ (extendedStylePropertyChain trace styles)
--- Optimizable with Data.Sequence
-
---
-extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties]
-extendedStylePropertyChain' [] _ = Nothing
-extendedStylePropertyChain' [style] styles = Just (
- (stylePropertyChain style styles)
- ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
- )
-extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++)
- (extendedStylePropertyChain' trace styles)
-
---
-stylePropertyChain' :: Styles -> Style -> [StyleProperties]
-stylePropertyChain' = flip stylePropertyChain
-
---
-getStylePropertyChain :: StyleName -> Styles -> [StyleProperties]
-getStylePropertyChain name styles = maybe []
- (`stylePropertyChain` styles)
- (lookupStyle name styles)
-
---
-getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a]
-getPropertyChain extract style styles = catMaybes
- $ map extract
- $ stylePropertyChain style styles
-
---
-textPropertyChain :: Style -> Styles -> [TextProperties]
-textPropertyChain = getPropertyChain textProperties
-
---
-paraPropertyChain :: Style -> Styles -> [ParaProperties]
-paraPropertyChain = getPropertyChain paraProperties
-
---
-getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a
-getTextProperty extract style styles = fmap extract
- $ listToMaybe
- $ textPropertyChain style styles
-
---
-getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a
-getTextProperty' extract style styles = F.asum
- $ map extract
- $ textPropertyChain style styles
-
---
-getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a
-getParaProperty extract style styles = fmap extract
- $ listToMaybe
- $ paraPropertyChain style styles
-
--- | Lifts the reader into another readers' state.
-liftStyles :: (OdtConverterState s -> OdtConverterState Styles)
- -> (OdtConverterState Styles -> OdtConverterState s )
- -> XMLReader s x x
-liftStyles extract inject = switchState extract inject
- $ convertingExtraState M.empty readAllStyles
-
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
deleted file mode 100644
index c8dbbf45a..000000000
--- a/src/Text/Pandoc/Readers/Org.hs
+++ /dev/null
@@ -1,62 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Conversion of org-mode formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Org ( readOrg ) where
-
-import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
-import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
-import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState )
-
-import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import Text.Pandoc.Options
-
-import Control.Monad.Except ( throwError )
-import Control.Monad.Reader ( runReaderT )
-
-
--- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readOrg opts s = do
- parsed <- flip runReaderT def $
- readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
- case parsed of
- Right result -> return result
- Left _ -> throwError $ PandocParseError "problem parsing org"
-
---
--- Parser
---
-parseOrg :: PandocMonad m => OrgParser m Pandoc
-parseOrg = do
- blocks' <- blockList
- meta' <- meta
- return $ Pandoc meta' blocks'
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
deleted file mode 100644
index 5588c4552..000000000
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ /dev/null
@@ -1,137 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode inline elements.
--}
-module Text.Pandoc.Readers.Org.BlockStarts
- ( exampleLineStart
- , hline
- , noteMarker
- , tableStart
- , drawerStart
- , headerStart
- , metaLineStart
- , latexEnvStart
- , commentLineStart
- , bulletListStart
- , orderedListStart
- , endOfBlock
- ) where
-
-import Control.Monad ( void )
-import Text.Pandoc.Readers.Org.Parsing
-
--- | Horizontal Line (five -- dashes or more)
-hline :: Monad m => OrgParser m ()
-hline = try $ do
- skipSpaces
- string "-----"
- many (char '-')
- skipSpaces
- newline
- return ()
-
--- | Read the start of a header line, return the header level
-headerStart :: Monad m => OrgParser m Int
-headerStart = try $
- (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-
-tableStart :: Monad m => OrgParser m Char
-tableStart = try $ skipSpaces *> char '|'
-
-latexEnvStart :: Monad m => OrgParser m String
-latexEnvStart = try $ do
- skipSpaces *> string "\\begin{"
- *> latexEnvName
- <* string "}"
- <* blankline
- where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
-
-
--- | Parses bullet list marker.
-bulletListStart :: Monad m => OrgParser m ()
-bulletListStart = try $
- choice
- [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
- , () <$ skipSpaces1 <* char '*' <* skipSpaces1
- ]
-
-genericListStart :: Monad m
- => OrgParser m String
- -> OrgParser m Int
-genericListStart listMarker = try $
- (+) <$> (length <$> many spaceChar)
- <*> (length <$> listMarker <* many1 spaceChar)
-
-orderedListStart :: Monad m => OrgParser m Int
-orderedListStart = genericListStart orderedListMarker
- -- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
-
-drawerStart :: Monad m => OrgParser m String
-drawerStart = try $
- skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
-
-metaLineStart :: Monad m => OrgParser m ()
-metaLineStart = try $ skipSpaces <* string "#+"
-
-commentLineStart :: Monad m => OrgParser m ()
-commentLineStart = try $ skipSpaces <* string "# "
-
-exampleLineStart :: Monad m => OrgParser m ()
-exampleLineStart = () <$ try (skipSpaces *> string ": ")
-
-noteMarker :: Monad m => OrgParser m String
-noteMarker = try $ do
- char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
- ]
-
--- | Succeeds if the parser is at the end of a block.
-endOfBlock :: Monad m => OrgParser m ()
-endOfBlock = lookAhead . try $ do
- void blankline <|> anyBlockStart
- where
- -- Succeeds if there is a new block starting at this position.
- anyBlockStart :: Monad m => OrgParser m ()
- anyBlockStart = try . choice $
- [ exampleLineStart
- , hline
- , metaLineStart
- , commentLineStart
- , void noteMarker
- , void tableStart
- , void drawerStart
- , void headerStart
- , void latexEnvStart
- , void bulletListStart
- , void orderedListStart
- ]
-
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
deleted file mode 100644
index 78ac8d0d1..000000000
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ /dev/null
@@ -1,979 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE ViewPatterns #-}
-{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode block elements.
--}
-module Text.Pandoc.Readers.Org.Blocks
- ( blockList
- , meta
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks )
-import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead )
-
-import Control.Monad ( foldM, guard, mzero, void )
-import Data.Char ( isSpace, toLower, toUpper)
-import Data.Default ( Default )
-import Data.List ( foldl', isPrefixOf )
-import Data.Maybe ( fromMaybe, isNothing )
-import Data.Monoid ((<>))
-
---
--- Org headers
---
-newtype Tag = Tag { fromTag :: String }
- deriving (Show, Eq)
-
--- | Create a tag containing the given string.
-toTag :: String -> Tag
-toTag = Tag
-
--- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
- deriving (Show, Eq, Ord)
-
--- | Create a property key containing the given string. Org mode keys are
--- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
-
--- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
-
--- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
-toPropertyValue = PropertyValue
-
--- | Check whether the property value is non-nil (i.e. truish).
-isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-
--- | Key/value pairs from a PROPERTIES drawer
-type Properties = [(PropertyKey, PropertyValue)]
-
--- | Org mode headline (i.e. a document subtree).
-data Headline = Headline
- { headlineLevel :: Int
- , headlineTodoMarker :: Maybe TodoMarker
- , headlineText :: Inlines
- , headlineTags :: [Tag]
- , headlineProperties :: Properties
- , headlineContents :: Blocks
- , headlineChildren :: [Headline]
- }
-
---
--- Parsing headlines and subtrees
---
-
--- | Read an Org mode headline and its contents (i.e. a document subtree).
--- @lvl@ gives the minimum acceptable level of the tree.
-headline :: PandocMonad m => Int -> OrgParser m (F Headline)
-headline lvl = try $ do
- level <- headerStart
- guard (lvl <= level)
- todoKw <- optionMaybe todoKeyword
- title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
- tags <- option [] headerTags
- newline
- properties <- option mempty propertiesDrawer
- contents <- blocks
- children <- many (headline (level + 1))
- return $ do
- title' <- title
- contents' <- contents
- children' <- sequence children
- return $ Headline
- { headlineLevel = level
- , headlineTodoMarker = todoKw
- , headlineText = title'
- , headlineTags = tags
- , headlineProperties = properties
- , headlineContents = contents'
- , headlineChildren = children'
- }
- where
- endOfTitle :: Monad m => OrgParser m ()
- endOfTitle = void . lookAhead $ optional headerTags *> newline
-
- headerTags :: Monad m => OrgParser m [Tag]
- headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
- in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-
--- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
-headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-headlineToBlocks hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- case () of
- _ | any isNoExportTag headlineTags -> return mempty
- _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
- _ | isCommentTitle headlineText -> return mempty
- _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
- _ | otherwise -> headlineToHeaderWithContents hdln
-
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
-
-isArchiveTag :: Tag -> Bool
-isArchiveTag = (== toTag "ARCHIVE")
-
--- | Check if the title starts with COMMENT.
--- FIXME: This accesses builder internals not intended for use in situations
--- like these. Replace once keyword parsing is supported.
-isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _ = False
-
-archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-archivedHeadlineToBlocks hdln = do
- archivedTreesOption <- getExportSetting exportArchivedTrees
- case archivedTreesOption of
- ArchivedTreesNoExport -> return mempty
- ArchivedTreesExport -> headlineToHeaderWithContents hdln
- ArchivedTreesHeadlineOnly -> headlineToHeader hdln
-
-headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithList hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- header <- headlineToHeader hdln
- listElements <- sequence (map headlineToBlocks headlineChildren)
- let listBlock = if null listElements
- then mempty
- else B.orderedList listElements
- let headerText = if maxHeadlineLevels == headlineLevel
- then header
- else flattenHeader header
- return $ headerText <> headlineContents <> listBlock
- where
- flattenHeader :: Blocks -> Blocks
- flattenHeader blks =
- case B.toList blks of
- (Header _ _ inlns:_) -> B.para (B.fromList inlns)
- _ -> mempty
-
-headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithContents hdln@(Headline {..}) = do
- header <- headlineToHeader hdln
- childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
- return $ header <> headlineContents <> childrenBlocks
-
-headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeader (Headline {..}) = do
- exportTodoKeyword <- getExportSetting exportWithTodoKeywords
- let todoText = if exportTodoKeyword
- then case headlineTodoMarker of
- Just kw -> todoKeywordToInlines kw <> B.space
- Nothing -> mempty
- else mempty
- let text = tagTitle (todoText <> headlineText) headlineTags
- let propAttr = propertiesToAttr headlineProperties
- attr <- registerHeader propAttr headlineText
- return $ B.headerWith attr headlineLevel text
-
-todoKeyword :: Monad m => OrgParser m TodoMarker
-todoKeyword = try $ do
- taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
- choice (map kwParser taskStates)
-
-todoKeywordToInlines :: TodoMarker -> Inlines
-todoKeywordToInlines tdm =
- let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
- classes = [todoState, todoText]
- in B.spanWith (mempty, classes, mempty) (B.str todoText)
-
-propertiesToAttr :: Properties -> Attr
-propertiesToAttr properties =
- let
- toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
- customIdKey = toPropertyKey "custom_id"
- classKey = toPropertyKey "class"
- unnumberedKey = toPropertyKey "unnumbered"
- specialProperties = [customIdKey, classKey, unnumberedKey]
- id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
- cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
- $ properties
- isUnnumbered =
- fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties
- in
- (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs')
-
-tagTitle :: Inlines -> [Tag] -> Inlines
-tagTitle title tags = title <> (mconcat $ map tagToInline tags)
-
-tagToInline :: Tag -> Inlines
-tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
-
-
---
--- parsing blocks
---
-
--- | Get a list of blocks.
-blockList :: PandocMonad m => OrgParser m [Block]
-blockList = do
- initialBlocks <- blocks
- headlines <- sequence <$> manyTill (headline 1) eof
- st <- getState
- headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st
- return . B.toList $ (runF initialBlocks st) <> headlineBlocks
-
--- | Get the meta information safed in the state.
-meta :: Monad m => OrgParser m Meta
-meta = do
- meta' <- metaExport
- runF meta' <$> getState
-
-blocks :: PandocMonad m => OrgParser m (F Blocks)
-blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
-
-block :: PandocMonad m => OrgParser m (F Blocks)
-block = choice [ mempty <$ blanklines
- , table
- , orgBlock
- , figure
- , example
- , genericDrawer
- , specialLine
- , horizontalRule
- , list
- , latexFragment
- , noteBlock
- , paraOrPlain
- ] <?> "block"
-
-
---
--- Block Attributes
---
-
--- | Attributes that may be added to figures (like a name or caption).
-data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
- , blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
- }
-
--- | Convert BlockAttributes into pandoc Attr
-attrFromBlockAttributes :: BlockAttributes -> Attr
-attrFromBlockAttributes (BlockAttributes{..}) =
- let
- ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
- classes = case lookup "class" blockAttrKeyValues of
- Nothing -> []
- Just clsStr -> words clsStr
- kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
- in (ident, classes, kv)
-
-stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String)
-stringyMetaAttribute attrCheck = try $ do
- metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
- guard $ attrCheck attrName
- skipSpaces
- attrValue <- anyLine
- return (attrName, attrValue)
-
-blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
-blockAttributes = try $ do
- kv <- many (stringyMetaAttribute attrCheck)
- let caption = foldl' (appendValues "CAPTION") Nothing kv
- let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
- let name = lookup "NAME" kv
- let label = lookup "LABEL" kv
- caption' <- case caption of
- Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
- return $ BlockAttributes
- { blockAttrName = name
- , blockAttrLabel = label
- , blockAttrCaption = caption'
- , blockAttrKeyValues = kvAttrs'
- }
- where
- attrCheck :: String -> Bool
- attrCheck attr =
- case attr of
- "NAME" -> True
- "LABEL" -> True
- "CAPTION" -> True
- "ATTR_HTML" -> True
- _ -> False
-
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
- appendValues attrName accValue (key, value) =
- if key /= attrName
- then accValue
- else case accValue of
- Just acc -> Just $ acc ++ ' ':value
- Nothing -> Just value
-
-keyValues :: Monad m => OrgParser m [(String, String)]
-keyValues = try $
- manyTill ((,) <$> key <*> value) newline
- where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
-
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
-
- endOfValue :: Monad m => OrgParser m ()
- endOfValue =
- lookAhead $ (() <$ try (many1 spaceChar <* key))
- <|> () <$ newline
-
-
---
--- Org Blocks (#+BEGIN_... / #+END_...)
---
-
--- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
-orgBlock :: PandocMonad m => OrgParser m (F Blocks)
-orgBlock = try $ do
- blockAttrs <- blockAttributes
- blkType <- blockHeaderStart
- ($ blkType) $
- case (map toLower blkType) of
- "export" -> exportBlock
- "comment" -> rawBlockLines (const mempty)
- "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "example" -> rawBlockLines (return . exampleCode)
- "quote" -> parseBlockLines (fmap B.blockQuote)
- "verse" -> verseBlock
- "src" -> codeBlock blockAttrs
- _ -> parseBlockLines $
- let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
- in fmap $ B.divWith (ident, classes ++ [blkType], kv)
- where
- blockHeaderStart :: Monad m => OrgParser m String
- blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
-
- lowercase :: String -> String
- lowercase = map toLower
-
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
-rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
-
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
-parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
- where
- parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
- parsedBlockContent = try $ do
- raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
-
--- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
-rawBlockContent blockType = try $ do
- blkLines <- manyTill rawLine blockEnder
- tabLen <- getOption readerTabStop
- return
- . unlines
- . stripIndent
- . map (tabsToSpaces tabLen . commaEscaped)
- $ blkLines
- where
- rawLine :: Monad m => OrgParser m String
- rawLine = try $ ("" <$ blankline) <|> anyLine
-
- blockEnder :: Monad m => OrgParser m ()
- blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
-
- stripIndent :: [String] -> [String]
- stripIndent strs = map (drop (shortestIndent strs)) strs
-
- shortestIndent :: [String] -> Int
- shortestIndent = foldr min maxBound
- . map (length . takeWhile isSpace)
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
-
--- | Read but ignore all remaining block headers.
-ignHeaders :: Monad m => OrgParser m ()
-ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-
--- | Read a block containing code intended for export in specific backends
--- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
-exportBlock blockType = try $ do
- exportType <- skipSpaces *> orgArgWord <* ignHeaders
- contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
-
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
-verseBlock blockType = try $ do
- ignHeaders
- content <- rawBlockContent blockType
- fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
- where
- -- replace initial spaces with nonbreaking spaces to preserve
- -- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
- parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
- then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
- return (trimInlinesF $ pure nbspIndent <> line)
-
--- | Read a code block and the associated results block if present. Which of
--- boths blocks is included in the output is determined using the "exports"
--- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
-codeBlock blockAttrs blockType = do
- skipSpaces
- (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- content <- rawBlockContent blockType
- resultsContent <- trailingResultsBlock
- let id' = fromMaybe mempty $ blockAttrName blockAttrs
- let includeCode = exportsCode kv
- let includeResults = exportsResults kv
- let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- let labelledBlck = maybe (pure codeBlck)
- (labelDiv codeBlck)
- (blockAttrCaption blockAttrs)
- let resultBlck = fromMaybe mempty resultsContent
- return $
- (if includeCode then labelledBlck else mempty) <>
- (if includeResults then resultBlck else mempty)
- where
- labelDiv :: Blocks -> F Inlines -> F Blocks
- labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
-
- labelledBlock :: F Inlines -> F Blocks
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
- || ("rundoc-exports", "results") `elem` attrs)
-
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
- || ("rundoc-exports", "both") `elem` attrs
-
-trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks))
-trailingResultsBlock = optionMaybe . try $ do
- blanklines
- stringAnyCase "#+RESULTS:"
- blankline
- block
-
--- | Parse code block arguments
--- TODO: We currently don't handle switches.
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
-codeHeaderArgs = try $ do
- language <- skipSpaces *> orgArgWord
- _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
- parameters <- manyTill blockOption newline
- let pandocLang = translateLang language
- return $
- if hasRundocParameters parameters
- then ( [ pandocLang, rundocBlockClass ]
- , map toRundocAttrib (("language", language) : parameters)
- )
- else ([ pandocLang ], parameters)
- where
- hasRundocParameters = not . null
-
-switch :: Monad m => OrgParser m (Char, Maybe String)
-switch = try $ simpleSwitch <|> lineNumbersSwitch
- where
- simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
- lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
- (string "-l \"" *> many1Till nonspaceChar (char '"'))
-
-blockOption :: Monad m => OrgParser m (String, String)
-blockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgParamValue
- return (argKey, paramValue)
-
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':' )
- *> many1 nonspaceChar
- <* skipSpaces
-
-horizontalRule :: Monad m => OrgParser m (F Blocks)
-horizontalRule = return B.horizontalRule <$ try hline
-
-
---
--- Drawers
---
-
--- | A generic drawer which has no special meaning for org-mode.
--- Whether or not this drawer is included in the output depends on the drawers
--- export setting.
-genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
-genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
- content <- manyTill drawerLine (try drawerEnd)
- state <- getState
- -- Include drawer if it is explicitly included in or not explicitly excluded
- -- from the list of drawers that should be exported. PROPERTIES drawers are
- -- never exported.
- case (exportDrawers . orgStateExportSettings $ state) of
- _ | name == "PROPERTIES" -> return mempty
- Left names | name `elem` names -> return mempty
- Right names | name `notElem` names -> return mempty
- _ -> drawerDiv name <$> parseLines content
- where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
-
- drawerDiv :: String -> F Blocks -> F Blocks
- drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-
-drawerLine :: Monad m => OrgParser m String
-drawerLine = anyLine
-
-drawerEnd :: Monad m => OrgParser m String
-drawerEnd = try $
- skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
-
--- | Read a :PROPERTIES: drawer and return the key/value pairs contained
--- within.
-propertiesDrawer :: Monad m => OrgParser m Properties
-propertiesDrawer = try $ do
- drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
- manyTill property (try drawerEnd)
- where
- property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
- property = try $ (,) <$> key <*> value
-
- key :: Monad m => OrgParser m PropertyKey
- key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
-
- value :: Monad m => OrgParser m PropertyValue
- value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
-
-
---
--- Figures
---
-
--- | Figures or an image paragraph (i.e. an image on a line by itself). Only
--- images with a caption attribute are interpreted as figures.
-figure :: PandocMonad m => OrgParser m (F Blocks)
-figure = try $ do
- figAttrs <- blockAttributes
- src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
- Nothing -> mzero
- Just imgSrc -> do
- guard (isImageFilename imgSrc)
- let isFigure = not . isNothing $ blockAttrCaption figAttrs
- return $ imageBlock isFigure figAttrs imgSrc
- where
- selfTarget :: PandocMonad m => OrgParser m String
- selfTarget = try $ char '[' *> linkTarget <* char ']'
-
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
- imageBlock isFigure figAttrs imgSrc =
- let
- figName = fromMaybe mempty $ blockAttrName figAttrs
- figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
- figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
- figKeyVals = blockAttrKeyValues figAttrs
- attr = (figLabel, mempty, figKeyVals)
- figTitle = (if isFigure then withFigPrefix else id) figName
- in
- B.para . B.imageWith attr imgSrc figTitle <$> figCaption
-
- withFigPrefix :: String -> String
- withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
- then cs
- else "fig:" ++ cs
-
--- | Succeeds if looking at the end of the current paragraph
-endOfParagraph :: Monad m => OrgParser m ()
-endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-
-
---
--- Examples
---
-
--- | Example code marked up by a leading colon.
-example :: Monad m => OrgParser m (F Blocks)
-example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
- where
- exampleLine :: Monad m => OrgParser m String
- exampleLine = try $ exampleLineStart *> anyLine
-
-exampleCode :: String -> Blocks
-exampleCode = B.codeBlockWith ("", ["example"], [])
-
-
---
--- Comments, Options and Metadata
---
-
-specialLine :: PandocMonad m => OrgParser m (F Blocks)
-specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
-
-rawExportLine :: PandocMonad m => OrgParser m Blocks
-rawExportLine = try $ do
- metaLineStart
- key <- metaKey
- if key `elem` ["latex", "html", "texinfo", "beamer"]
- then B.rawBlock key <$> anyLine
- else mzero
-
-commentLine :: Monad m => OrgParser m Blocks
-commentLine = commentLineStart *> anyLine *> pure mempty
-
-
---
--- Tables
---
-data ColumnProperty = ColumnProperty
- { columnAlignment :: Maybe Alignment
- , columnRelWidth :: Maybe Int
- } deriving (Show, Eq)
-
-instance Default ColumnProperty where
- def = ColumnProperty Nothing Nothing
-
-data OrgTableRow = OrgContentRow (F [Blocks])
- | OrgAlignRow [ColumnProperty]
- | OrgHlineRow
-
--- OrgTable is strongly related to the pandoc table ADT. Using the same
--- (i.e. pandoc-global) ADT would mean that the reader would break if the
--- global structure was to be changed, which would be bad. The final table
--- should be generated using a builder function.
-data OrgTable = OrgTable
- { orgTableColumnProperties :: [ColumnProperty]
- , orgTableHeader :: [Blocks]
- , orgTableRows :: [[Blocks]]
- }
-
-table :: PandocMonad m => OrgParser m (F Blocks)
-table = try $ do
- blockAttrs <- blockAttributes
- lookAhead tableStart
- do
- rows <- tableRows
- let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
- return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
-
-orgToPandocTable :: OrgTable
- -> Inlines
- -> Blocks
-orgToPandocTable (OrgTable colProps heads lns) caption =
- let totalWidth = if any (not . isNothing) (map columnRelWidth colProps)
- then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
- else Nothing
- in B.table caption (map (convertColProp totalWidth) colProps) heads lns
- where
- convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double)
- convertColProp totalWidth colProp =
- let
- align' = fromMaybe AlignDefault $ columnAlignment colProp
- width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
- <$> (columnRelWidth colProp)
- <*> totalWidth
- in (align', width')
-
-tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
-tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
-
-tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
-tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
-
-tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
-tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
-
-tableAlignRow :: Monad m => OrgParser m OrgTableRow
-tableAlignRow = try $ do
- tableStart
- colProps <- many1Till columnPropertyCell newline
- -- Empty rows are regular (i.e. content) rows, not alignment rows.
- guard $ any (/= def) colProps
- return $ OrgAlignRow colProps
-
-columnPropertyCell :: Monad m => OrgParser m ColumnProperty
-columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
- where
- emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell)
- propCell = try $ ColumnProperty
- <$> (skipSpaces
- *> char '<'
- *> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
- <* char '>'
- <* emptyCell)
-
-tableAlignFromChar :: Monad m => OrgParser m Alignment
-tableAlignFromChar = try $
- choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
-
-tableHline :: Monad m => OrgParser m OrgTableRow
-tableHline = try $
- OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
-
-endOfCell :: Monad m => OrgParser m Char
-endOfCell = try $ char '|' <|> lookAhead newline
-
-rowsToTable :: [OrgTableRow]
- -> F OrgTable
-rowsToTable = foldM rowToContent emptyTable
- where emptyTable = OrgTable mempty mempty mempty
-
-normalizeTable :: OrgTable -> OrgTable
-normalizeTable (OrgTable colProps heads rows) =
- OrgTable colProps' heads rows
- where
- refRow = if heads /= mempty
- then heads
- else case rows of
- (r:_) -> r
- _ -> mempty
- cols = length refRow
- fillColumns base padding = take cols $ base ++ repeat padding
- colProps' = fillColumns colProps def
-
--- One or more horizontal rules after the first content line mark the previous
--- line as a header. All other horizontal lines are discarded.
-rowToContent :: OrgTable
- -> OrgTableRow
- -> F OrgTable
-rowToContent orgTable row =
- case row of
- OrgHlineRow -> return singleRowPromotedToHeader
- OrgAlignRow props -> return . setProperties $ props
- OrgContentRow cs -> appendToBody cs
- where
- singleRowPromotedToHeader :: OrgTable
- singleRowPromotedToHeader = case orgTable of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- orgTable{ orgTableHeader = b , orgTableRows = [] }
- _ -> orgTable
-
- setProperties :: [ColumnProperty] -> OrgTable
- setProperties ps = orgTable{ orgTableColumnProperties = ps }
-
- appendToBody :: F [Blocks] -> F OrgTable
- appendToBody frow = do
- newRow <- frow
- let oldRows = orgTableRows orgTable
- -- NOTE: This is an inefficient O(n) operation. This should be changed
- -- if performance ever becomes a problem.
- return orgTable{ orgTableRows = oldRows ++ [newRow] }
-
-
---
--- LaTeX fragments
---
-latexFragment :: Monad m => OrgParser m (F Blocks)
-latexFragment = try $ do
- envName <- latexEnvStart
- content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
- where
- c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
- , c
- , "\\end{", e, "}\n"
- ]
-
-latexEnd :: Monad m => String -> OrgParser m ()
-latexEnd envName = try $
- () <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
- <* blankline
-
-
---
--- Footnote defintions
---
-noteBlock :: PandocMonad m => OrgParser m (F Blocks)
-noteBlock = try $ do
- ref <- noteMarker <* skipSpaces
- content <- mconcat <$> blocksTillHeaderOrNote
- addToNotesTable (ref, content)
- return mempty
- where
- blocksTillHeaderOrNote =
- many1Till block (eof <|> () <$ lookAhead noteMarker
- <|> () <$ lookAhead headerStart)
-
--- Paragraphs or Plain text
-paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
-paraOrPlain = try $ do
- -- Make sure we are not looking at a headline
- notFollowedBy' (char '*' *> (oneOf " *"))
- ils <- inlines
- nl <- option False (newline *> return True)
- -- Read block as paragraph, except if we are in a list context and the block
- -- is directly followed by a list item, in which case the block is read as
- -- plain text.
- try (guard nl
- *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
- *> return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
-
-
---
--- list blocks
---
-
-list :: PandocMonad m => OrgParser m (F Blocks)
-list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-
-definitionList :: PandocMonad m => OrgParser m (F Blocks)
-definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.definitionList . fmap compactifyDL . sequence
- <$> many1 (definitionListItem $ bulletListStart' (Just n))
-
-bulletList :: PandocMonad m => OrgParser m (F Blocks)
-bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.bulletList . fmap compactify . sequence
- <$> many1 (listItem (bulletListStart' $ Just n))
-
-orderedList :: PandocMonad m => OrgParser m (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify . sequence
- <$> many1 (listItem orderedListStart)
-
-bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int
--- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing = do ind <- length <$> many spaceChar
- oneOf (bullets $ ind == 0)
- skipSpaces1
- return (ind + 1)
-bulletListStart' (Just n) = do count (n-1) spaceChar
- oneOf (bullets $ n == 1)
- many1 spaceChar
- return n
-
--- Unindented lists are legal, but they can't use '*' bullets.
--- We return n to maintain compatibility with the generic listItem.
-bullets :: Bool -> String
-bullets unindented = if unindented then "+-" else "*+-"
-
-definitionListItem :: PandocMonad m
- => OrgParser m Int
- -> OrgParser m (F (Inlines, [Blocks]))
-definitionListItem parseMarkerGetLength = try $ do
- markerLength <- parseMarkerGetLength
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
- line1 <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
- term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
- return $ (,) <$> term' <*> fmap (:[]) contents'
- where
- definitionMarker =
- spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: PandocMonad m
- => OrgParser m Int
- -> OrgParser m (F Blocks)
-listItem start = try . withContext ListItemState $ do
- markerLength <- try start
- firstLine <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
-
--- continuation of a list item - indented and separated by blankline or endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Monad m => Int
- -> OrgParser m String
-listContinuation markerLength = try $
- notFollowedBy' blankline
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
- where
- listLine = try $ indentWith markerLength *> anyLineNewline
-
- -- indent by specified number of spaces (or equiv. tabs)
- indentWith :: Monad m => Int -> OrgParser m String
- indentWith num = do
- tabStop <- getOption readerTabStop
- if num < tabStop
- then count num (char ' ')
- else choice [ try (count num (char ' '))
- , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-
--- | Parse any line, include the final newline in the output.
-anyLineNewline :: Monad m => OrgParser m String
-anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
deleted file mode 100644
index 391877c03..000000000
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ /dev/null
@@ -1,172 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode export options.
--}
-module Text.Pandoc.Readers.Org.ExportSettings
- ( exportSettings
- ) where
-
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.Maybe ( listToMaybe )
-
--- | Read and handle space separated org-mode export settings.
-exportSettings :: Monad m => OrgParser m ()
-exportSettings = void $ sepBy spaces exportSetting
-
--- | Setter function for export settings.
-type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
-
--- | Read and process a single org-mode export option.
-exportSetting :: Monad m => OrgParser m ()
-exportSetting = choice
- [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val })
- , booleanSetting "'" (\val es -> es { exportSmartQuotes = val })
- , booleanSetting "*" (\val es -> es { exportEmphasizedText = val })
- , booleanSetting "-" (\val es -> es { exportSpecialStrings = val })
- , ignoredSetting ":"
- , ignoredSetting "<"
- , ignoredSetting "\\n"
- , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val })
- , booleanSetting "author" (\val es -> es { exportWithAuthor = val })
- , ignoredSetting "c"
- -- org-mode allows the special value `comment` for creator, which we'll
- -- interpret as true as it doesn't make sense in the context of Pandoc.
- , booleanSetting "creator" (\val es -> es { exportWithCreator = val })
- , complementableListSetting "d" (\val es -> es { exportDrawers = val })
- , ignoredSetting "date"
- , ignoredSetting "e"
- , booleanSetting "email" (\val es -> es { exportWithEmail = val })
- , ignoredSetting "f"
- , integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
- , ignoredSetting "inline"
- , ignoredSetting "num"
- , ignoredSetting "p"
- , ignoredSetting "pri"
- , ignoredSetting "prop"
- , ignoredSetting "stat"
- , ignoredSetting "tags"
- , ignoredSetting "tasks"
- , ignoredSetting "tex"
- , ignoredSetting "timestamp"
- , ignoredSetting "title"
- , ignoredSetting "toc"
- , booleanSetting "todo" (\val es -> es { exportWithTodoKeywords = val })
- , ignoredSetting "|"
- ] <?> "export setting"
-
-genericExportSetting :: Monad m
- => OrgParser m a
- -> String
- -> ExportSettingSetter a
- -> OrgParser m ()
-genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
- value <- optionParser
- updateState $ modifyExportSettings value
- where
- modifyExportSettings val st =
- st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-
--- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
-booleanSetting = genericExportSetting elispBoolean
-
--- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
-integerSetting = genericExportSetting parseInt
- where
- parseInt = try $
- many1 digit >>= maybe mzero (return . fst) . listToMaybe . reads
-
--- | Either the string "headline" or an elisp boolean and treated as an
--- @ArchivedTreesOption@.
-archivedTreeSetting :: Monad m
- => String
- -> ExportSettingSetter ArchivedTreesOption
- -> OrgParser m ()
-archivedTreeSetting =
- genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean
- where
- archivedTreesHeadlineSetting = try $ do
- _ <- string "headline"
- lookAhead (newline <|> spaceChar)
- return ArchivedTreesHeadlineOnly
-
- archivedTreesBoolean = try $ do
- exportBool <- elispBoolean
- return $
- if exportBool
- then ArchivedTreesExport
- else ArchivedTreesNoExport
-
--- | A list or a complement list (i.e. a list starting with `not`).
-complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
- -> OrgParser m ()
-complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
- , Right <$> stringList
- , (\b -> if b then Left [] else Right []) <$> elispBoolean
- ]
- where
- -- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
- stringList = try $
- char '('
- *> sepBy elispString spaces
- <* char ')'
-
- -- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
- string "(not "
- *> sepBy elispString spaces
- <* char ')'
-
- elispString :: Monad m => OrgParser m String
- elispString = try $
- char '"'
- *> manyTill alphaNum (char '"')
-
--- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-
--- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
--- interpreted as true.
-elispBoolean :: Monad m => OrgParser m Bool
-elispBoolean = try $ do
- value <- many1 nonspaceChar
- return $ case map toLower value of
- "nil" -> False
- "{}" -> False
- "()" -> False
- _ -> True
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
deleted file mode 100644
index f3671641a..000000000
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ /dev/null
@@ -1,880 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode inline elements.
--}
-module Text.Pandoc.Readers.Org.Inlines
- ( inline
- , inlines
- , addToNotesTable
- , linkTarget
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines )
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
-import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
-import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Text.Pandoc.Class (PandocMonad)
-
-import Prelude hiding (sequence)
-import Control.Monad ( guard, mplus, mzero, when, void )
-import Control.Monad.Trans ( lift )
-import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( intersperse )
-import Data.Maybe ( fromMaybe )
-import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Data.Traversable (sequence)
-
---
--- Functions acting on the parser state
---
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
-recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-
-pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m ()
-pushToInlineCharStack c = updateState $ \s ->
- s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
-
-popInlineCharStack :: PandocMonad m => OrgParser m ()
-popInlineCharStack = updateState $ \s ->
- s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
-
-surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char]
-surroundingEmphasisChar =
- take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
-
-startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m ()
-startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Just maxNewlines }
-
-decEmphasisNewlinesCount :: PandocMonad m => OrgParser m ()
-decEmphasisNewlinesCount = updateState $ \s ->
- s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
-
-newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool
-newlinesCountWithinLimits = do
- st <- getState
- return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
-
-resetEmphasisNewlines :: PandocMonad m => OrgParser m ()
-resetEmphasisNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Nothing }
-
-addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m ()
-addToNotesTable note = do
- oldnotes <- orgStateNotes' <$> getState
- updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-
--- | Parse a single Org-mode inline element
-inline :: PandocMonad m => OrgParser m (F Inlines)
-inline =
- choice [ whitespace
- , linebreak
- , cite
- , footnote
- , linkOrImage
- , anchor
- , inlineCodeBlock
- , str
- , endline
- , emphasizedText
- , code
- , math
- , displayMath
- , verbatim
- , subscript
- , superscript
- , inlineLaTeX
- , exportSnippet
- , smart
- , symbol
- ] <* (guard =<< newlinesCountWithinLimits)
- <?> "inline"
-
--- | Read the rest of the input as inlines.
-inlines :: PandocMonad m => OrgParser m (F Inlines)
-inlines = trimInlinesF . mconcat <$> many1 inline
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~"
-
-
-whitespace :: PandocMonad m => OrgParser m (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- <?> "whitespace"
-
-linebreak :: PandocMonad m => OrgParser m (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-
-str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
- <* updateLastStrPos
-
--- | An endline character that can be treated as a space, not a structural
--- break. This should reflect the values of the Emacs variable
--- @org-element-pagaraph-separate@.
-endline :: PandocMonad m => OrgParser m (F Inlines)
-endline = try $ do
- newline
- notFollowedBy' endOfBlock
- decEmphasisNewlinesCount
- guard =<< newlinesCountWithinLimits
- updateLastPreCharPos
- return . return $ B.softbreak
-
-
---
--- Citations
---
-
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax. The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write. The
--- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others. It's dubbed Berkeley syntax due the place of activity of its main
--- contributors. All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
-
-cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
- guardEnabled Ext_citations
- (cs, raw) <- withRaw $ choice
- [ pandocOrgCite
- , orgRefCite
- , berkeleyTextualCite
- ]
- return $ (flip B.cite (B.text raw)) <$> cs
-
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
- char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
-
-orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
-orgRefCite = try $ choice
- [ normalOrgRefCite
- , fmap (:[]) <$> linkLikeOrgRefCite
- ]
-
-normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation])
-normalOrgRefCite = try $ do
- mode <- orgRefCiteMode
- firstCitation <- orgRefCiteList mode
- moreCitations <- many (try $ char ',' *> orgRefCiteList mode)
- return . sequence $ firstCitation : moreCitations
- where
- -- | A list of org-ref style citation keys, parsed as citation of the given
- -- citation mode.
- orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation)
- orgRefCiteList citeMode = try $ do
- key <- orgRefCiteKey
- returnF $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = citeMode
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
- bcl <- berkeleyCitationList
- return $ do
- parens <- berkeleyCiteParens <$> bcl
- prefix <- berkeleyCiteCommonPrefix <$> bcl
- suffix <- berkeleyCiteCommonSuffix <$> bcl
- citationList <- berkeleyCiteCitations <$> bcl
- return $
- if parens
- then toCite
- . maybe id (\p -> alterFirst (prependPrefix p)) prefix
- . maybe id (\s -> alterLast (appendSuffix s)) suffix
- $ citationList
- else maybe mempty (<> " ") prefix
- <> (toListOfCites $ map toInTextMode citationList)
- <> maybe mempty (", " <>) suffix
- where
- toCite :: [Citation] -> Inlines
- toCite cs = B.cite cs mempty
-
- toListOfCites :: [Citation] -> Inlines
- toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
- toInTextMode :: Citation -> Citation
- toInTextMode c = c { citationMode = AuthorInText }
-
- alterFirst, alterLast :: (a -> a) -> [a] -> [a]
- alterFirst _ [] = []
- alterFirst f (c:cs) = (f c):cs
- alterLast f = reverse . alterFirst f . reverse
-
- prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
- prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
- appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
- , berkeleyCiteCommonPrefix :: Maybe Inlines
- , berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
- }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
- char '['
- parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
- char ':'
- skipSpaces
- commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
- citations <- citeList
- commonSuffix <- optionMaybe (try $ citationListPart)
- char ']'
- return (BerkeleyCitationList parens
- <$> sequence commonPrefix
- <*> sequence commonSuffix
- <*> citations)
- where
- citationListPart :: PandocMonad m => OrgParser m (F Inlines)
- citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' citeKey
- notFollowedBy (oneOf ";]")
- inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey
- returnF . return $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like. However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
--- enclosedByPair '[' ']' berkeleyTextualCite
-
--- | Read a link-like org-ref style citation. The citation includes pre and
--- post text. However, multiple citations are not possible due to limitations
--- in the syntax.
-linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation)
-linkLikeOrgRefCite = try $ do
- _ <- string "[["
- mode <- orgRefCiteMode
- key <- orgRefCiteKey
- _ <- string "]["
- pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::")
- spc <- option False (True <$ spaceChar)
- suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]")
- return $ do
- pre' <- pre
- suf' <- suf
- return Citation
- { citationId = key
- , citationPrefix = B.toList pre'
- , citationSuffix = B.toList (if spc then B.space <> suf' else suf')
- , citationMode = mode
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- | Read a citation key. The characters allowed in citation keys are taken
--- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
-orgRefCiteKey = try . many1 . satisfy $ \c ->
- isAlphaNum c || c `elem` ("-_:\\./"::String)
-
--- | Supported citation types. Only a small subset of org-ref types is
--- supported for now. TODO: rewrite this, use LaTeX reader as template.
-orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode
-orgRefCiteMode =
- choice $ map (\(s, mode) -> mode <$ try (string s <* char ':'))
- [ ("cite", AuthorInText)
- , ("citep", NormalCitation)
- , ("citep*", NormalCitation)
- , ("citet", AuthorInText)
- , ("citet*", AuthorInText)
- , ("citeyear", SuppressAuthor)
- ]
-
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
-footnote :: PandocMonad m => OrgParser m (F Inlines)
-footnote = try $ inlineNote <|> referencedNote
-
-inlineNote :: PandocMonad m => OrgParser m (F Inlines)
-inlineNote = try $ do
- string "[fn:"
- ref <- many alphaNum
- char ':'
- note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- when (not $ null ref) $
- addToNotesTable ("fn:" ++ ref, note)
- return $ B.note <$> note
-
-referencedNote :: PandocMonad m => OrgParser m (F Inlines)
-referencedNote = try $ do
- ref <- noteMarker
- return $ do
- notes <- asksF orgStateNotes'
- case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- let contents' = runF contents st{ orgStateNotes' = [] }
- return $ B.note contents'
-
-linkOrImage :: PandocMonad m => OrgParser m (F Inlines)
-linkOrImage = explicitOrImageLink
- <|> selflinkOrImage
- <|> angleLink
- <|> plainLink
- <?> "link or image"
-
-explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines)
-explicitOrImageLink = try $ do
- char '['
- srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
- title <- enclosedRaw (char '[') (char ']')
- title' <- parseFromString (mconcat <$> many inline) title
- char ']'
- return $ do
- src <- srcF
- case cleanLinkString title of
- Just imgSrc | isImageFilename imgSrc ->
- pure $ B.link src "" $ B.image imgSrc mempty mempty
- _ ->
- linkToInlinesF src =<< title'
-
-selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
-selflinkOrImage = try $ do
- src <- char '[' *> linkTarget <* char ']'
- return $ linkToInlinesF src (B.str src)
-
-plainLink :: PandocMonad m => OrgParser m (F Inlines)
-plainLink = try $ do
- (orig, src) <- uri
- returnF $ B.link src "" (B.str orig)
-
-angleLink :: PandocMonad m => OrgParser m (F Inlines)
-angleLink = try $ do
- char '<'
- link <- plainLink
- char '>'
- return link
-
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
-possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-
-applyCustomLinkFormat :: String -> OrgParser m (F String)
-applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
- return $ do
- formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
-
--- | Take a link and return a function which produces new inlines when given
--- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- (Just cleanedLink) -> if isImageFilename cleanedLink
- then const . pure $ B.image cleanedLink "" ""
- else pure . B.link cleanedLink ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
-internalLink link title = do
- anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then return $ B.link ('#':link) "" title
- else return $ B.emph title
-
--- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
--- @anchor-id@ set as id. Legal anchors in org-mode are defined through
--- @org-target-regexp@, which is fairly liberal. Since no link is created if
--- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
--- an anchor.
-
-anchor :: PandocMonad m => OrgParser m (F Inlines)
-anchor = try $ do
- anchorId <- parseAnchor
- recordAnchorId anchorId
- returnF $ B.spanWith (solidify anchorId, [], []) mempty
- where
- parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
- <* string ">>"
- <* skipSpaces
-
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
--- the org function @org-export-solidify-link-text@.
-
-solidify :: String -> String
-solidify = map replaceSpecialChar
- where replaceSpecialChar c
- | isAlphaNum c = c
- | c `elem` ("_.-:" :: String) = c
- | otherwise = '-'
-
--- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
-inlineCodeBlock = try $ do
- string "src_"
- lang <- many1 orgArgWordChar
- opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
- let attrClasses = [translateLang lang, rundocBlockClass]
- let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
- inlineBlockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgInlineParamValue
- return (argKey, paramValue)
-
- orgInlineParamValue :: PandocMonad m => OrgParser m String
- orgInlineParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
- <* skipSpaces
-
-
-emphasizedText :: PandocMonad m => OrgParser m (F Inlines)
-emphasizedText = do
- state <- getState
- guard . exportEmphasizedText . orgStateExportSettings $ state
- try $ choice
- [ emph
- , strong
- , strikeout
- , underline
- ]
-
-enclosedByPair :: PandocMonad m
- => Char -- ^ opening char
- -> Char -- ^ closing char
- -> OrgParser m a -- ^ parser
- -> OrgParser m [a]
-enclosedByPair s e p = char s *> many1Till p (char e)
-
-emph :: PandocMonad m => OrgParser m (F Inlines)
-emph = fmap B.emph <$> emphasisBetween '/'
-
-strong :: PandocMonad m => OrgParser m (F Inlines)
-strong = fmap B.strong <$> emphasisBetween '*'
-
-strikeout :: PandocMonad m => OrgParser m (F Inlines)
-strikeout = fmap B.strikeout <$> emphasisBetween '+'
-
--- There is no underline, so we use strong instead.
-underline :: PandocMonad m => OrgParser m (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
-
-verbatim :: PandocMonad m => OrgParser m (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
-
-code :: PandocMonad m => OrgParser m (F Inlines)
-code = return . B.code <$> verbatimBetween '~'
-
-subscript :: PandocMonad m => OrgParser m (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
-
-superscript :: PandocMonad m => OrgParser m (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
-
-math :: PandocMonad m => OrgParser m (F Inlines)
-math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
- , rawMathBetween "\\(" "\\)"
- ]
-
-displayMath :: PandocMonad m => OrgParser m (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
- , rawMathBetween "$$" "$$"
- ]
-
-updatePositions :: PandocMonad m
- => Char
- -> OrgParser m Char
-updatePositions c = do
- when (c `elem` emphasisPreChars) updateLastPreCharPos
- when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
- return c
-
-symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
-
-emphasisBetween :: PandocMonad m
- => Char
- -> OrgParser m (F Inlines)
-emphasisBetween c = try $ do
- startEmphasisNewlinesCounting emphasisAllowedNewlines
- res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
- isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
- when isTopLevelEmphasis
- resetEmphasisNewlines
- return res
-
-verbatimBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-verbatimBetween c = try $
- emphasisStart c *>
- many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
- where
- verbatimChar = noneOf "\n\r" >>= updatePositions
-
--- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-mathStringBetween c = try $ do
- mathStart c
- body <- many1TillNOrLessNewlines mathAllowedNewlines
- (noneOf (c:"\n\r"))
- (lookAhead $ mathEnd c)
- final <- mathEnd c
- return $ body ++ [final]
-
--- | Parse a single character between @c@ using math rules
-math1CharBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-math1CharBetween c = try $ do
- char c
- res <- noneOf $ c:mathForbiddenBorderChars
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
-
-rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-
--- | Parses the start (opening character) of emphasis
-emphasisStart :: PandocMonad m => Char -> OrgParser m Char
-emphasisStart c = try $ do
- guard =<< afterEmphasisPreChar
- guard =<< notAfterString
- char c
- lookAhead (noneOf emphasisForbiddenBorderChars)
- pushToInlineCharStack c
- -- nested inlines are allowed, so mark this position as one which might be
- -- followed by another inline.
- updateLastPreCharPos
- return c
-
--- | Parses the closing character of emphasis
-emphasisEnd :: PandocMonad m => Char -> OrgParser m Char
-emphasisEnd c = try $ do
- guard =<< notAfterForbiddenBorderChar
- char c
- eof <|> () <$ lookAhead acceptablePostChars
- updateLastStrPos
- popInlineCharStack
- return c
- where acceptablePostChars =
- surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
-
-mathStart :: PandocMonad m => Char -> OrgParser m Char
-mathStart c = try $
- char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
-
-mathEnd :: PandocMonad m => Char -> OrgParser m Char
-mathEnd c = try $ do
- res <- noneOf (c:mathForbiddenBorderChars)
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return res
-
-
-enclosedInlines :: PandocMonad m => OrgParser m a
- -> OrgParser m b
- -> OrgParser m (F Inlines)
-enclosedInlines start end = try $
- trimInlinesF . mconcat <$> enclosed start end inline
-
-enclosedRaw :: PandocMonad m => OrgParser m a
- -> OrgParser m b
- -> OrgParser m String
-enclosedRaw start end = try $
- start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
- spanningTwoLines = try $
- anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
-
--- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
--- newlines.
-many1TillNOrLessNewlines :: PandocMonad m => Int
- -> OrgParser m Char
- -> OrgParser m a
- -> OrgParser m String
-many1TillNOrLessNewlines n p end = try $
- nMoreLines (Just n) mempty >>= oneOrMore
- where
- nMoreLines Nothing cs = return cs
- nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
- nMoreLines k cs = try $ (final k cs <|> rest k cs)
- >>= uncurry nMoreLines
- final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
- rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
- finalLine = try $ manyTill p end
- minus1 k = k - 1
- oneOrMore cs = guard (not $ null cs) *> return cs
-
--- Org allows customization of the way it reads emphasis. We use the defaults
--- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
--- for details).
-
--- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "\t \"'({"
-
--- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "\t\n !\"'),-.:;?\\}"
-
--- | Chars not allowed at the (inner) border of emphasis
-emphasisForbiddenBorderChars :: [Char]
-emphasisForbiddenBorderChars = "\t\n\r \"',"
-
--- | The maximum number of newlines within
-emphasisAllowedNewlines :: Int
-emphasisAllowedNewlines = 1
-
--- LaTeX-style math: see `org-latex-regexps` for details
-
--- | Chars allowed after an inline ($...$) math statement
-mathPostChars :: [Char]
-mathPostChars = "\t\n \"'),-.:;?"
-
--- | Chars not allowed at the (inner) border of math
-mathForbiddenBorderChars :: [Char]
-mathForbiddenBorderChars = "\t\n\r ,;.$"
-
--- | Maximum number of newlines in an inline math statement
-mathAllowedNewlines :: Int
-mathAllowedNewlines = 2
-
--- | Whether we are right behind a char allowed before emphasis
-afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool
-afterEmphasisPreChar = do
- pos <- getPosition
- lastPrePos <- orgStateLastPreCharPos <$> getState
- return . fromMaybe True $ (== pos) <$> lastPrePos
-
--- | Whether the parser is right after a forbidden border char
-notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool
-notAfterForbiddenBorderChar = do
- pos <- getPosition
- lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
- return $ lastFBCPos /= Just pos
-
--- | Read a sub- or superscript expression
-subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
-subOrSuperExpr = try $
- choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
- , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
- ] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
-
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
- state <- getState
- guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
- ]
-
-inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
-inlineLaTeX = try $ do
- cmd <- inlineLaTeXCommand
- ils <- (lift . lift) $ parseAsInlineLaTeX cmd
- maybe mzero returnF $
- parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils
- where
- parseAsMath :: String -> Maybe Inlines
- parseAsMath cs = B.fromList <$> texMathToPandoc cs
-
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
- parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
-
- parseAsMathMLSym :: String -> Maybe Inlines
- parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
- -- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
-
- state :: ParserState
- state = def{ stateOptions = def{ readerExtensions =
- enableExtension Ext_raw_tex (readerExtensions def) } }
-
- texMathToPandoc :: String -> Maybe [Inline]
- texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
-
-maybeRight :: Either a b -> Maybe b
-maybeRight = either (const Nothing) Just
-
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
-inlineLaTeXCommand = try $ do
- rest <- getInput
- parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
- case parsed of
- Right (RawInline _ cs) -> do
- -- drop any trailing whitespace, those are not be part of the command as
- -- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
- count len anyChar
- return cmdNoSpc
- _ -> mzero
-
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
-exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
-exportSnippet = try $ do
- string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
- returnF $ B.rawInline format snippet
-
-smart :: PandocMonad m => OrgParser m (F Inlines)
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
- where
- orgDash = do
- guard =<< getExportSetting exportSpecialStrings
- dash <* updatePositions '-'
- orgEllipses = do
- guard =<< getExportSetting exportSpecialStrings
- ellipses <* updatePositions '.'
- orgApostrophe =
- (char '\'' <|> char '\8217') <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- *> return (B.str "\x2019")
-
-singleQuoted :: PandocMonad m => OrgParser m (F Inlines)
-singleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
- singleQuoteStart
- updatePositions '\''
- withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline (singleQuoteEnd <* updatePositions '\'')
-
--- doubleQuoted will handle regular double-quoted sections, as well
--- as dialogues with an open double-quote without a close double-quote
--- in the same paragraph.
-doubleQuoted :: PandocMonad m => OrgParser m (F Inlines)
-doubleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
- doubleQuoteStart
- updatePositions '"'
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
deleted file mode 100644
index 2f4e21248..000000000
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ /dev/null
@@ -1,218 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
-{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Meta
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode meta declarations.
--}
-module Text.Pandoc.Readers.Org.Meta
- ( metaExport
- , metaKey
- , metaLine
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Blocks, Inlines )
-import Text.Pandoc.Class ( PandocMonad )
-import Text.Pandoc.Definition
-
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.List ( intersperse )
-import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Network.HTTP ( urlEncode )
-
--- | Returns the current meta, respecting export options.
-metaExport :: Monad m => OrgParser m (F Meta)
-metaExport = do
- st <- getState
- let settings = orgStateExportSettings st
- return $ (if exportWithAuthor settings then id else removeMeta "author")
- . (if exportWithCreator settings then id else removeMeta "creator")
- . (if exportWithEmail settings then id else removeMeta "email")
- <$> orgStateMeta st
-
-removeMeta :: String -> Meta -> Meta
-removeMeta key meta' =
- let metaMap = unMeta meta'
- in Meta $ M.delete key metaMap
-
--- | Parse and handle a single line containing meta information
--- The order, in which blocks are tried, makes sure that we're not looking at
--- the beginning of a block, so we don't need to check for it
-metaLine :: PandocMonad m => OrgParser m Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-
-declarationLine :: PandocMonad m => OrgParser m ()
-declarationLine = try $ do
- key <- map toLower <$> metaKey
- (key', value) <- metaValue key
- updateState $ \st ->
- let meta' = B.setMeta key' <$> value <*> pure nullMeta
- in st { orgStateMeta = meta' <> orgStateMeta st }
-
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
-
-metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue))
-metaValue key =
- let inclKey = "header-includes"
- in case key of
- "author" -> (key,) <$> metaInlinesCommaSeparated
- "title" -> (key,) <$> metaInlines
- "date" -> (key,) <$> metaInlines
- "header-includes" -> (key,) <$> accumulatingList key metaInlines
- "latex_header" -> (inclKey,) <$>
- accumulatingList inclKey (metaExportSnippet "latex")
- "latex_class" -> ("documentclass",) <$> metaString
- -- Org-mode expects class options to contain the surrounding brackets,
- -- pandoc does not.
- "latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
- "html_head" -> (inclKey,) <$>
- accumulatingList inclKey (metaExportSnippet "html")
- _ -> (key,) <$> metaString
-
-metaInlines :: PandocMonad m => OrgParser m (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
-
-metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
-metaInlinesCommaSeparated = do
- authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
- newline
- authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
- let toMetaInlines = MetaInlines . B.toList
- return $ MetaList . map toMetaInlines <$> sequence authors
-
-metaString :: Monad m => OrgParser m (F MetaValue)
-metaString = metaModifiedString id
-
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
-metaModifiedString f = return . MetaString . f <$> anyLine
-
--- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
-metaExportSnippet format =
- return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-
--- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
- -> OrgParser m (F MetaValue)
- -> OrgParser m (F MetaValue)
-accumulatingList key p = do
- value <- p
- meta' <- orgStateMeta <$> getState
- return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value
- where curList m = case lookupMeta key m of
- Just (MetaList ms) -> ms
- Just x -> [x]
- _ -> []
-
---
--- export options
---
-optionLine :: Monad m => OrgParser m ()
-optionLine = try $ do
- key <- metaKey
- case key of
- "link" -> parseLinkFormat >>= uncurry addLinkFormat
- "options" -> exportSettings
- "todo" -> todoSequence >>= updateState . registerTodoSequence
- "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
- "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
- _ -> mzero
-
-addLinkFormat :: Monad m => String
- -> (String -> String)
- -> OrgParser m ()
-addLinkFormat key formatter = updateState $ \s ->
- let fs = orgStateLinkFormatters s
- in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
-parseLinkFormat :: Monad m => OrgParser m ((String, String -> String))
-parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
- linkSubst <- parseFormat
- return (linkType, linkSubst)
-
--- | An ad-hoc, single-argument-only implementation of a printf-style format
--- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
-parseFormat = try $ do
- replacePlain <|> replaceUrl <|> justAppend
- where
- -- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
- <$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
- <$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
-
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
-
-inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
-
---
--- ToDo Sequences and Keywords
---
-todoSequence :: Monad m => OrgParser m TodoSequence
-todoSequence = try $ do
- todoKws <- todoKeywords
- doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
- newline
- -- There must be at least one DONE keyword. The last TODO keyword is taken if
- -- necessary.
- case doneKws of
- Just done -> return $ keywordsToSequence todoKws done
- Nothing -> case reverse todoKws of
- [] -> mzero -- no keywords present
- (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
-
- where
- todoKeywords :: Monad m => OrgParser m [String]
- todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
- endOfKeywords = todoDoneSep <|> void newline
- in manyTill keyword (lookAhead endOfKeywords)
-
- todoDoneSep :: Monad m => OrgParser m ()
- todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
-
- keywordsToSequence :: [String] -> [String] -> TodoSequence
- keywordsToSequence todo done =
- let todoMarkers = map (TodoMarker Todo) todo
- doneMarkers = map (TodoMarker Done) done
- in todoMarkers ++ doneMarkers
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
deleted file mode 100644
index 181dd1d5c..000000000
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ /dev/null
@@ -1,259 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Define the Org-mode parser state.
--}
-module Text.Pandoc.Readers.Org.ParserState
- ( OrgParserState (..)
- , OrgParserLocal (..)
- , OrgNoteRecord
- , HasReaderOptions (..)
- , HasQuoteContext (..)
- , TodoMarker (..)
- , TodoSequence
- , TodoState (..)
- , activeTodoMarkers
- , registerTodoSequence
- , F(..)
- , askF
- , asksF
- , trimInlinesF
- , runF
- , returnF
- , ExportSettings (..)
- , ArchivedTreesOption (..)
- , optionsToParserState
- ) where
-
-import Control.Monad (liftM, liftM2)
-import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local)
-
-import Data.Default (Default(..))
-import qualified Data.Map as M
-import qualified Data.Set as Set
-
-import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
-import Text.Pandoc.Definition ( Meta(..), nullMeta )
-import Text.Pandoc.Options ( ReaderOptions(..) )
-import Text.Pandoc.Parsing ( HasHeaderMap(..)
- , HasIdentifierList(..)
- , HasLastStrPosition(..)
- , HasQuoteContext(..)
- , HasReaderOptions(..)
- , ParserContext(..)
- , QuoteContext(..)
- , SourcePos )
-
--- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
--- | Table of footnotes
-type OrgNoteTable = [OrgNoteRecord]
--- | Map of functions for link transformations. The map key is refers to the
--- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
-
--- | The states in which a todo item can be
-data TodoState = Todo | Done
- deriving (Eq, Ord, Show)
-
--- | A ToDo keyword like @TODO@ or @DONE@.
-data TodoMarker = TodoMarker
- { todoMarkerState :: TodoState
- , todoMarkerName :: String
- }
- deriving (Show, Eq)
-
--- | Collection of todo markers in the order in which items should progress
-type TodoSequence = [TodoMarker]
-
--- | Org-mode parser state
-data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
- , orgStateEmphasisCharStack :: [Char]
- , orgStateEmphasisNewlines :: Maybe Int
- , orgStateExportSettings :: ExportSettings
- , orgStateHeaderMap :: M.Map Inlines String
- , orgStateIdentifiers :: Set.Set String
- , orgStateLastForbiddenCharPos :: Maybe SourcePos
- , orgStateLastPreCharPos :: Maybe SourcePos
- , orgStateLastStrPos :: Maybe SourcePos
- , orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMeta :: F Meta
- , orgStateNotes' :: OrgNoteTable
- , orgStateOptions :: ReaderOptions
- , orgStateParserContext :: ParserContext
- , orgStateTodoSequences :: [TodoSequence]
- }
-
-data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
-
-instance Default OrgParserLocal where
- def = OrgParserLocal NoQuote
-
-instance HasReaderOptions OrgParserState where
- extractReaderOptions = orgStateOptions
-
-instance HasLastStrPosition OrgParserState where
- getLastStrPos = orgStateLastStrPos
- setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
-
-instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where
- getQuoteContext = asks orgLocalQuoteContext
- withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
-
-instance HasIdentifierList OrgParserState where
- extractIdentifierList = orgStateIdentifiers
- updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
-
-instance HasHeaderMap OrgParserState where
- extractHeaderMap = orgStateHeaderMap
- updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
-
-instance Default OrgParserState where
- def = defaultOrgParserState
-
-defaultOrgParserState :: OrgParserState
-defaultOrgParserState = OrgParserState
- { orgStateAnchorIds = []
- , orgStateEmphasisCharStack = []
- , orgStateEmphasisNewlines = Nothing
- , orgStateExportSettings = def
- , orgStateHeaderMap = M.empty
- , orgStateIdentifiers = Set.empty
- , orgStateLastForbiddenCharPos = Nothing
- , orgStateLastPreCharPos = Nothing
- , orgStateLastStrPos = Nothing
- , orgStateLinkFormatters = M.empty
- , orgStateMeta = return nullMeta
- , orgStateNotes' = []
- , orgStateOptions = def
- , orgStateParserContext = NullState
- , orgStateTodoSequences = []
- }
-
-optionsToParserState :: ReaderOptions -> OrgParserState
-optionsToParserState opts =
- def { orgStateOptions = opts }
-
-registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState
-registerTodoSequence todoSeq st =
- let curSeqs = orgStateTodoSequences st
- in st{ orgStateTodoSequences = todoSeq : curSeqs }
-
--- | Get the current todo/done sequences. If no custom todo sequences have been
--- defined, return a list containing just the default todo/done sequence.
-activeTodoSequences :: OrgParserState -> [TodoSequence]
-activeTodoSequences st =
- let curSeqs = orgStateTodoSequences st
- in if null curSeqs
- then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]]
- else curSeqs
-
-activeTodoMarkers :: OrgParserState -> TodoSequence
-activeTodoMarkers = concat . activeTodoSequences
-
-
---
--- Export Settings
---
-
--- | Options for the way archived trees are handled.
-data ArchivedTreesOption =
- ArchivedTreesExport -- ^ Export the complete tree
- | ArchivedTreesNoExport -- ^ Exclude archived trees from exporting
- | ArchivedTreesHeadlineOnly -- ^ Export only the headline, discard the contents
-
--- | Export settings <http://orgmode.org/manual/Export-settings.html>
--- These settings can be changed via OPTIONS statements.
-data ExportSettings = ExportSettings
- { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
- -- ^ Specify drawer names which should be exported. @Left@ names are
- -- explicitly excluded from the resulting output while @Right@ means that
- -- only the listed drawer names should be included.
- , exportEmphasizedText :: Bool -- ^ Parse emphasized text
- , exportHeadlineLevels :: Int
- -- ^ Maximum depth of headlines, deeper headlines are convert to list
- , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
- , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
- , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
- , exportWithAuthor :: Bool -- ^ Include author in final meta-data
- , exportWithCreator :: Bool -- ^ Include creator in final meta-data
- , exportWithEmail :: Bool -- ^ Include email in final meta-data
- , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
- }
-
-instance Default ExportSettings where
- def = defaultExportSettings
-
-defaultExportSettings :: ExportSettings
-defaultExportSettings = ExportSettings
- { exportArchivedTrees = ArchivedTreesHeadlineOnly
- , exportDrawers = Left ["LOGBOOK"]
- , exportEmphasizedText = True
- , exportHeadlineLevels = 3
- , exportSmartQuotes = True
- , exportSpecialStrings = True
- , exportSubSuperscripts = True
- , exportWithAuthor = True
- , exportWithCreator = True
- , exportWithEmail = True
- , exportWithTodoKeywords = True
- }
-
-
---
--- Parser state reader
---
-
--- | Reader monad wrapping the parser state. This is used to delay evaluation
--- until all relevant information has been parsed and made available in the
--- parser state. See also the newtype of the same name in
--- Text.Pandoc.Parsing.
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Functor, Applicative, Monad)
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
-
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
-
-askF :: F OrgParserState
-askF = F ask
-
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
-returnF :: Monad m => a -> m (F a)
-returnF = return . return
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
deleted file mode 100644
index 1eb8a3b00..000000000
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ /dev/null
@@ -1,217 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Org-mode parsing utilities.
-
-Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
-functions are adapted to Org-mode specific functionality.
--}
-module Text.Pandoc.Readers.Org.Parsing
- ( OrgParser
- , anyLine
- , blanklines
- , newline
- , parseFromString
- , skipSpaces1
- , inList
- , withContext
- , getExportSetting
- , updateLastForbiddenCharPos
- , updateLastPreCharPos
- , orgArgKey
- , orgArgWord
- , orgArgWordChar
- -- * Re-exports from Text.Pandoc.Parser
- , ParserContext (..)
- , many1Till
- , notFollowedBy'
- , spaceChar
- , nonspaceChar
- , skipSpaces
- , blankline
- , enclosed
- , stringAnyCase
- , charsInBalanced
- , uri
- , withRaw
- , readWithM
- , guardEnabled
- , updateLastStrPos
- , notAfterString
- , ParserState (..)
- , registerHeader
- , QuoteContext (..)
- , singleQuoteStart
- , singleQuoteEnd
- , doubleQuoteStart
- , doubleQuoteEnd
- , dash
- , ellipses
- , citeKey
- -- * Re-exports from Text.Pandoc.Parsec
- , runParser
- , runParserT
- , getInput
- , char
- , letter
- , digit
- , alphaNum
- , skipMany1
- , spaces
- , anyChar
- , satisfy
- , string
- , count
- , eof
- , noneOf
- , oneOf
- , lookAhead
- , notFollowedBy
- , many
- , many1
- , manyTill
- , (<|>)
- , (<?>)
- , choice
- , try
- , sepBy
- , sepBy1
- , sepEndBy1
- , option
- , optional
- , optionMaybe
- , getState
- , updateState
- , SourcePos
- , getPosition
- ) where
-
-import Text.Pandoc.Readers.Org.ParserState
-
-import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
- , parseFromString )
-
-import Control.Monad ( guard )
-import Control.Monad.Reader ( ReaderT )
-
--- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
-
---
--- Adaptions and specializations of parsing utilities
---
-
--- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
-anyLine =
- P.anyLine
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
-parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
- updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
- result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
- return result
-
--- | Skip one or more tab or space characters.
-skipSpaces1 :: Monad m => OrgParser m ()
-skipSpaces1 = skipMany1 spaceChar
-
--- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: Monad m => OrgParser m Char
-newline =
- P.newline
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
-blanklines =
- P.blanklines
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Succeeds when we're in list context.
-inList :: Monad m => OrgParser m ()
-inList = do
- ctx <- orgStateParserContext <$> getState
- guard (ctx == ListItemState)
-
--- | Parse in different context
-withContext :: Monad m
- => ParserContext -- ^ New parser context
- -> OrgParser m a -- ^ Parser to run in that context
- -> OrgParser m a
-withContext context parser = do
- oldContext <- orgStateParserContext <$> getState
- updateState $ \s -> s{ orgStateParserContext = context }
- result <- parser
- updateState $ \s -> s{ orgStateParserContext = oldContext }
- return result
-
---
--- Parser state functions
---
-
--- | Get an export setting.
-getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a
-getExportSetting s = s . orgStateExportSettings <$> getState
-
--- | Set the current position as the last position at which a forbidden char
--- was found (i.e. a character which is not allowed at the inner border of
--- markup).
-updateLastForbiddenCharPos :: Monad m => OrgParser m ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
--- | Set the current parser position as the position at which a character was
--- seen which allows inline markup to follow.
-updateLastPreCharPos :: Monad m => OrgParser m ()
-updateLastPreCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
---
--- Org key-value parsing
---
-
--- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
-orgArgKey = try $
- skipSpaces *> char ':'
- *> many1 orgArgWordChar
-
--- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
-
--- | Chars treated as part of a word in plists.
-orgArgWordChar :: Monad m => OrgParser m Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
deleted file mode 100644
index 8c87cfa25..000000000
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Utility functions used in other Pandoc Org modules.
--}
-module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
- , isImageFilename
- , rundocBlockClass
- , toRundocAttrib
- , translateLang
- ) where
-
-import Control.Arrow ( first )
-import Data.Char ( isAlphaNum )
-import Data.List ( isPrefixOf, isSuffixOf )
-
-
--- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename filename =
- any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
- ':' `notElem` filename)
- where
- imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
- protocols = [ "file", "http", "https" ]
-
--- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
- _ -> Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
--- | Prefix the name of a attribute, marking it as a code execution parameter.
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first (rundocPrefix ++)
-
--- | Translate from Org-mode's programming language identifiers to those used
--- by Pandoc. This is useful to allow for proper syntax highlighting in
--- Pandoc output.
-translateLang :: String -> String
-translateLang cs =
- case cs of
- "C" -> "c"
- "C++" -> "cpp"
- "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported
- "js" -> "javascript"
- "lisp" -> "commonlisp"
- "R" -> "r"
- "sh" -> "bash"
- "sqlite" -> "sql"
- _ -> cs
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
deleted file mode 100644
index 441c573d9..000000000
--- a/src/Text/Pandoc/Readers/RST.hs
+++ /dev/null
@@ -1,1354 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion from reStructuredText to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.RST ( readRST ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder (setMeta, fromList)
-import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Error
-import Control.Monad ( when, liftM, guard, mzero )
-import Data.List ( findIndex, intercalate, isInfixOf,
- transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
-import Data.Maybe (fromMaybe, isJust)
-import qualified Data.Map as M
-import Text.Printf ( printf )
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import qualified Text.Pandoc.Builder as B
-import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit, isSpace, toUpper)
-import Data.Monoid ((<>))
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
-
--- TODO:
--- [ ] .. parsed-literal
--- [ ] :widths: attribute in .. table
--- [ ] .. csv-table
--- [ ] .. list-table
-
--- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readRST opts s = do
- parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n")
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-type RSTParser m = ParserT [Char] ParserState m
-
---
--- Constants and data structure definitions
----
-
-bulletListMarkers :: [Char]
-bulletListMarkers = "*+-"
-
-underlineChars :: [Char]
-underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221"
-
---
--- parsing documents
---
-
-isHeader :: Int -> Block -> Bool
-isHeader n (Header x _ _) = x == n
-isHeader _ _ = False
-
--- | Promote all headers in a list of blocks. (Part of
--- title transformation for RST.)
-promoteHeaders :: Int -> [Block] -> [Block]
-promoteHeaders num ((Header level attr text):rest) =
- (Header (level - num) attr text):(promoteHeaders num rest)
-promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
-promoteHeaders _ [] = []
-
--- | If list of blocks starts with a header (or a header and subheader)
--- of level that are not found elsewhere, return it as a title and
--- promote all the other headers. Also process a definition list right
--- after the title block as metadata.
-titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
- -> ([Block], Meta) -- ^ modified list of blocks, metadata
-titleTransform (bs, meta) =
- let (bs', meta') =
- case bs of
- ((Header 1 _ head1):(Header 2 _ head2):rest)
- | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
- (promoteHeaders 2 rest, setMeta "title" (fromList head1) $
- setMeta "subtitle" (fromList head2) meta)
- ((Header 1 _ head1):rest)
- | not (any (isHeader 1) rest) -> -- title only
- (promoteHeaders 1 rest,
- setMeta "title" (fromList head1) meta)
- _ -> (bs, meta)
- in case bs' of
- (DefinitionList ds : rest) ->
- (rest, metaFromDefList ds meta')
- _ -> (bs', meta')
-
-metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
-metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
- where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
- adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
- $ M.adjust toPlain "date"
- $ M.adjust toPlain "title"
- $ M.mapKeys (\k -> if k == "authors" then "author" else k)
- $ metamap
- toPlain (MetaBlocks [Para xs]) = MetaInlines xs
- toPlain x = x
- splitAuthors (MetaBlocks [Para xs])
- = MetaList $ map MetaInlines
- $ splitAuthors' xs
- splitAuthors x = x
- splitAuthors' = map normalizeSpaces .
- splitOnSemi . concatMap factorSemi
- splitOnSemi = splitBy (==Str ";")
- factorSemi (Str []) = []
- factorSemi (Str s) = case break (==';') s of
- (xs,[]) -> [Str xs]
- (xs,';':ys) -> Str xs : Str ";" :
- factorSemi (Str ys)
- (xs,ys) -> Str xs :
- factorSemi (Str ys)
- factorSemi x = [x]
-
-parseRST :: PandocMonad m => RSTParser m Pandoc
-parseRST = do
- optional blanklines -- skip blank lines at beginning of file
- startPos <- getPosition
- -- go through once just to get list of reference keys and notes
- -- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- concat <$>
- manyTill (referenceKey <|> noteBlock <|> lineClump) eof
- setInput docMinusKeys
- setPosition startPos
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- -- now parse it for real...
- blocks <- B.toList <$> parseBlocks
- standalone <- getOption readerStandalone
- state <- getState
- let meta = stateMeta state
- let (blocks', meta') = if standalone
- then titleTransform (blocks, meta)
- else (blocks, meta)
- reportLogMessages
- return $ Pandoc meta' blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: PandocMonad m => RSTParser m Blocks
-parseBlocks = mconcat <$> manyTill block eof
-
-block :: PandocMonad m => RSTParser m Blocks
-block = choice [ codeBlock
- , blockQuote
- , fieldList
- , include
- , directive
- , comment
- , header
- , hrule
- , lineBlock -- must go before definitionList
- , table
- , list
- , lhsCodeBlock
- , para
- , mempty <$ blanklines
- ] <?> "block"
-
---
--- field list
---
-
-rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
-rawFieldListItem minIndent = try $ do
- indent <- length <$> many (char ' ')
- guard $ indent >= minIndent
- char ':'
- name <- many1Till (noneOf "\n") (char ':')
- (() <$ lookAhead newline) <|> skipMany1 spaceChar
- first <- anyLine
- rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
- indentedBlock
- let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
- return (name, raw)
-
-fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
-fieldListItem minIndent = try $ do
- (name, raw) <- rawFieldListItem minIndent
- term <- parseInlineFromString name
- contents <- parseFromString parseBlocks raw
- optional blanklines
- return (term, [contents])
-
-fieldList :: PandocMonad m => RSTParser m Blocks
-fieldList = try $ do
- indent <- length <$> lookAhead (many spaceChar)
- items <- many1 $ fieldListItem indent
- case items of
- [] -> return mempty
- items' -> return $ B.definitionList items'
-
---
--- line block
---
-
-lineBlock :: PandocMonad m => RSTParser m Blocks
-lineBlock = try $ do
- lines' <- lineBlockLines
- lines'' <- mapM parseInlineFromString lines'
- return $ B.lineBlock lines''
-
-lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
-lineBlockDirective body = do
- lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
- return $ B.lineBlock lines'
-
---
--- paragraph block
---
-
--- note: paragraph can end in a :: starting a code block
-para :: PandocMonad m => RSTParser m Blocks
-para = try $ do
- result <- trimInlines . mconcat <$> many1 inline
- option (B.plain result) $ try $ do
- newline
- blanklines
- case viewr (B.unMany result) of
- ys :> (Str xs) | "::" `isSuffixOf` xs -> do
- raw <- option mempty codeBlockBody
- return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
- <> raw
- _ -> return (B.para result)
-
-plain :: PandocMonad m => RSTParser m Blocks
-plain = B.plain . trimInlines . mconcat <$> many1 inline
-
---
--- header blocks
---
-
-header :: PandocMonad m => RSTParser m Blocks
-header = doubleHeader <|> singleHeader <?> "header"
-
--- a header with lines on top and bottom
-doubleHeader :: PandocMonad m => RSTParser m Blocks
-doubleHeader = try $ do
- c <- oneOf underlineChars
- rest <- many (char c) -- the top line
- let lenTop = length (c:rest)
- skipSpaces
- newline
- txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- if (len > lenTop) then fail "title longer than border" else return ()
- blankline -- spaces and newline
- count lenTop (char c) -- the bottom line
- blanklines
- -- check to see if we've had this kind of header before.
- -- if so, get appropriate level. if not, add to list.
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- attr <- registerHeader nullAttr txt
- return $ B.headerWith attr level txt
-
--- a header with line on the bottom only
-singleHeader :: PandocMonad m => RSTParser m Blocks
-singleHeader = try $ do
- notFollowedBy' whitespace
- txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
- pos <- getPosition
- let len = (sourceColumn pos) - 1
- blankline
- c <- oneOf underlineChars
- count (len - 1) (char c)
- many (char c)
- blanklines
- state <- getState
- let headerTable = stateHeaderTable state
- let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
- Just ind -> (headerTable, ind + 1)
- Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
- setState (state { stateHeaderTable = headerTable' })
- attr <- registerHeader nullAttr txt
- return $ B.headerWith attr level txt
-
---
--- hrule block
---
-
-hrule :: Monad m => ParserT [Char] st m Blocks
-hrule = try $ do
- chr <- oneOf underlineChars
- count 3 (char chr)
- skipMany (char chr)
- blankline
- blanklines
- return B.horizontalRule
-
---
--- code blocks
---
-
--- read a line indented by a given string
-indentedLine :: Monad m => String -> ParserT [Char] st m [Char]
-indentedLine indents = try $ do
- string indents
- anyLine
-
--- one or more indented lines, possibly separated by blank lines.
--- any amount of indentation will work.
-indentedBlock :: Monad m => ParserT [Char] st m [Char]
-indentedBlock = try $ do
- indents <- lookAhead $ many1 spaceChar
- lns <- many1 $ try $ do b <- option "" blanklines
- l <- indentedLine indents
- return (b ++ l)
- optional blanklines
- return $ unlines lns
-
-quotedBlock :: Monad m => ParserT [Char] st m [Char]
-quotedBlock = try $ do
- quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
- lns <- many1 $ lookAhead (char quote) >> anyLine
- optional blanklines
- return $ unlines lns
-
-codeBlockStart :: Monad m => ParserT [Char] st m Char
-codeBlockStart = string "::" >> blankline >> blankline
-
-codeBlock :: Monad m => ParserT [Char] st m Blocks
-codeBlock = try $ codeBlockStart >> codeBlockBody
-
-codeBlockBody :: Monad m => ParserT [Char] st m Blocks
-codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
- (indentedBlock <|> quotedBlock)
-
-lhsCodeBlock :: Monad m => RSTParser m Blocks
-lhsCodeBlock = try $ do
- getPosition >>= guard . (==1) . sourceColumn
- guardEnabled Ext_literate_haskell
- optional codeBlockStart
- lns <- latexCodeBlock <|> birdCodeBlock
- blanklines
- return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
- $ intercalate "\n" lns
-
-latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
-latexCodeBlock = try $ do
- try (latexBlockLine "\\begin{code}")
- many1Till anyLine (try $ latexBlockLine "\\end{code}")
- where
- latexBlockLine s = skipMany spaceChar >> string s >> blankline
-
-birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
-birdCodeBlock = filterSpace <$> many1 birdTrackLine
- where filterSpace lns =
- -- if (as is normal) there is always a space after >, drop it
- if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
- else lns
-
-birdTrackLine :: Monad m => ParserT [Char] st m [Char]
-birdTrackLine = char '>' >> anyLine
-
---
--- block quotes
---
-
-blockQuote :: PandocMonad m => RSTParser m Blocks
-blockQuote = do
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
- return $ B.blockQuote contents
-
-{-
-Unsupported options for include:
-tab-width
-encoding
--}
-
-include :: PandocMonad m => RSTParser m Blocks
-include = try $ do
- string ".. include::"
- skipMany spaceChar
- f <- trim <$> anyLine
- fields <- many $ rawFieldListItem 3
- -- options
- let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
- let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
- guard $ not (null f)
- oldPos <- getPosition
- oldInput <- getInput
- containers <- stateContainers <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
- updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbContents <- readFileFromDirs ["."] f
- contentLines <- case mbContents of
- Just s -> return $ lines s
- Nothing -> do
- logMessage $ CouldNotLoadIncludeFile f oldPos
- return []
- let numLines = length contentLines
- let startLine' = case startLine of
- Nothing -> 1
- Just x | x >= 0 -> x
- | otherwise -> numLines + x -- negative from end
- let endLine' = case endLine of
- Nothing -> numLines + 1
- Just x | x >= 0 -> x
- | otherwise -> numLines + x -- negative from end
- let contentLines' = drop (startLine' - 1)
- $ take (endLine' - 1)
- $ contentLines
- let contentLines'' = (case trim <$> lookup "end-before" fields of
- Just patt -> takeWhile (not . (patt `isInfixOf`))
- Nothing -> id) .
- (case trim <$> lookup "start-after" fields of
- Just patt -> drop 1 .
- dropWhile (not . (patt `isInfixOf`))
- Nothing -> id) $ contentLines'
- let contents' = unlines contentLines''
- case lookup "code" fields of
- Just lang -> do
- let numberLines = lookup "number-lines" fields
- let classes = trimr lang : ["numberLines" | isJust numberLines] ++
- maybe [] words (lookup "class" fields)
- let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines
- let ident = maybe "" trimr $ lookup "name" fields
- let attribs = (ident, classes, kvs)
- return $ B.codeBlockWith attribs contents'
- Nothing -> case lookup "literal" fields of
- Just _ -> return $ B.rawBlock "rst" contents'
- Nothing -> do
- setPosition $ newPos f 1 1
- setInput contents'
- bs <- optional blanklines >>
- (mconcat <$> many block)
- setInput oldInput
- setPosition oldPos
- updateState $ \s -> s{ stateContainers =
- tail $ stateContainers s }
- return bs
-
-
---
--- list blocks
---
-
-list :: PandocMonad m => RSTParser m Blocks
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks])
-definitionListItem = try $ do
- -- avoid capturing a directive or comment
- notFollowedBy (try $ char '.' >> char '.')
- term <- trimInlines . mconcat <$> many1Till inline endline
- raw <- indentedBlock
- -- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n"
- return (term, [contents])
-
-definitionList :: PandocMonad m => RSTParser m Blocks
-definitionList = B.definitionList <$> many1 definitionListItem
-
--- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT [Char] st m Int
-bulletListStart = try $ do
- notFollowedBy' hrule -- because hrules start out just like lists
- marker <- oneOf bulletListMarkers
- white <- many1 spaceChar
- return $ length (marker:white)
-
--- parses ordered list start and returns its length (inc following whitespace)
-orderedListStart :: Monad m => ListNumberStyle
- -> ListNumberDelim
- -> RSTParser m Int
-orderedListStart style delim = try $ do
- (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
- white <- many1 spaceChar
- return $ markerLen + length white
-
--- parse a line of a list item
-listLine :: Monad m => Int -> RSTParser m [Char]
-listLine markerLength = try $ do
- notFollowedBy blankline
- indentWith markerLength
- line <- anyLine
- return $ line ++ "\n"
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Monad m => Int -> RSTParser m [Char]
-indentWith num = do
- tabStop <- getOption readerTabStop
- if (num < tabStop)
- then count num (char ' ')
- else choice [ try (count num (char ' ')),
- (try (char '\t' >> count (num - tabStop) (char ' '))) ]
-
--- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: Monad m => RSTParser m Int
- -> RSTParser m (Int, [Char])
-rawListItem start = try $ do
- markerLength <- start
- firstLine <- anyLine
- restLines <- many (listLine markerLength)
- return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
-
--- continuation of a list item - indented and separated by blankline or
--- (in compact lists) endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Monad m => Int -> RSTParser m [Char]
-listContinuation markerLength = try $ do
- blanks <- many1 blankline
- result <- many1 (listLine markerLength)
- return $ blanks ++ concat result
-
-listItem :: PandocMonad m
- => RSTParser m Int
- -> RSTParser m Blocks
-listItem start = try $ do
- (markerLength, first) <- rawListItem start
- rest <- many (listContinuation markerLength)
- skipMany1 blankline <|> () <$ lookAhead start
- -- parsing with ListItemState forces markers at beginning of lines to
- -- count as list item markers, even if not separated by blank space.
- -- see definition of "endline"
- state <- getState
- let oldContext = stateParserContext state
- setState $ state {stateParserContext = ListItemState}
- -- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n"
- updateState (\st -> st {stateParserContext = oldContext})
- return $ case B.toList parsed of
- [Para xs] -> B.singleton $ Plain xs
- [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys]
- [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys]
- [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
- _ -> parsed
-
-orderedList :: PandocMonad m => RSTParser m Blocks
-orderedList = try $ do
- (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
- items <- many1 (listItem (orderedListStart style delim))
- let items' = compactify items
- return $ B.orderedListWith (start, style, delim) items'
-
-bulletList :: PandocMonad m => RSTParser m Blocks
-bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart)
-
---
--- directive (e.g. comment, container, compound-paragraph)
---
-
-comment :: Monad m => RSTParser m Blocks
-comment = try $ do
- string ".."
- skipMany1 spaceChar <|> (() <$ lookAhead newline)
- notFollowedBy' directiveLabel
- manyTill anyChar blanklines
- optional indentedBlock
- return mempty
-
-directiveLabel :: Monad m => RSTParser m String
-directiveLabel = map toLower
- <$> many1Till (letter <|> char '-') (try $ string "::")
-
-directive :: PandocMonad m => RSTParser m Blocks
-directive = try $ do
- string ".."
- directive'
-
-directive' :: PandocMonad m => RSTParser m Blocks
-directive' = do
- skipMany1 spaceChar
- label <- directiveLabel
- skipMany spaceChar
- top <- many $ satisfy (/='\n')
- <|> try (char '\n' <*
- notFollowedBy' (rawFieldListItem 3) <*
- count 3 (char ' ') <*
- notFollowedBy blankline)
- newline
- fields <- many $ rawFieldListItem 3
- body <- option "" $ try $ blanklines >> indentedBlock
- optional blanklines
- let body' = body ++ "\n\n"
- imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height")
- where
- classes = words $ maybe "" trim $ lookup cl fields
- getAtt k = case lookup k fields of
- Just v -> [(k, filter (not . isSpace) v)]
- Nothing -> []
- case label of
- "table" -> tableDirective top fields body'
- "line-block" -> lineBlockDirective body'
- "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
- "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
- "container" -> parseFromString parseBlocks body'
- "replace" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim top)
- "unicode" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim $ unicodeTransform top)
- "compound" -> parseFromString parseBlocks body'
- "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
- "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
- "highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
- "rubric" -> B.para . B.strong <$> parseInlineFromString top
- _ | label `elem` ["attention","caution","danger","error","hint",
- "important","note","tip","warning","admonition"] ->
- do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
- let lab = case label of
- "admonition" -> mempty
- (l:ls) -> B.divWith ("",["admonition-title"],[])
- (B.para (B.str (toUpper l : ls)))
- [] -> mempty
- return $ B.divWith ("",[label],[]) (lab <> bod)
- "sidebar" ->
- do let subtit = maybe "" trim $ lookup "subtitle" fields
- tit <- B.para . B.strong <$> parseInlineFromString
- (trim top ++ if null subtit
- then ""
- else (": " ++ subtit))
- bod <- parseFromString parseBlocks body'
- return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
- "topic" ->
- do tit <- B.para . B.strong <$> parseInlineFromString top
- bod <- parseFromString parseBlocks body'
- return $ B.divWith ("",["topic"],[]) $ tit <> bod
- "default-role" -> mempty <$ updateState (\s ->
- s { stateRstDefaultRole =
- case trim top of
- "" -> stateRstDefaultRole def
- role -> role })
- x | x == "code" || x == "code-block" ->
- codeblock (words $ fromMaybe [] $ lookup "class" fields)
- (lookup "number-lines" fields) (trim top) body
- "aafig" -> do
- let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields)
- return $ B.codeBlockWith attribs $ stripTrailingNewlines body
- "math" -> return $ B.para $ mconcat $ map B.displayMath
- $ toChunks $ top ++ "\n\n" ++ body
- "figure" -> do
- (caption, legend) <- parseFromString extractCaption body'
- let src = escapeURI $ trim top
- return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend
- "image" -> do
- let src = escapeURI $ trim top
- let alt = B.str $ maybe "image" trim $ lookup "alt" fields
- let attr = imgAttr "class"
- return $ B.para
- $ case lookup "target" fields of
- Just t -> B.link (escapeURI $ trim t) ""
- $ B.imageWith attr src "" alt
- Nothing -> B.imageWith attr src "" alt
- "class" -> do
- let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
- -- directive content or the first immediately following element
- children <- case body of
- "" -> block
- _ -> parseFromString parseBlocks body'
- return $ B.divWith attrs children
- other -> do
- pos <- getPosition
- logMessage $ SkippedContent (".. " ++ other) pos
- return mempty
-
-tableDirective :: PandocMonad m
- => String -> [(String, String)] -> String -> RSTParser m Blocks
-tableDirective top _fields body = do
- bs <- parseFromString parseBlocks body
- case B.toList bs of
- [Table _ aligns' widths' header' rows'] -> do
- title <- parseFromString (trimInlines . mconcat <$> many inline) top
- -- TODO widths
- -- align is not applicable since we can't represent whole table align
- return $ B.singleton $ Table (B.toList title)
- aligns' widths' header' rows'
- _ -> return mempty
-
--- TODO:
--- - Only supports :format: fields with a single format for :raw: roles,
--- change Text.Pandoc.Definition.Format to fix
-addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks
-addNewRole roleString fields = do
- pos <- getPosition
- (role, parentRole) <- parseFromString inheritedRole roleString
- customRoles <- stateRstCustomRoles <$> getState
- let getBaseRole (r, f, a) roles =
- case M.lookup r roles of
- Just (r', f', a') -> getBaseRole (r', f', a') roles
- Nothing -> (r, f, a)
- (baseRole, baseFmt, baseAttr) =
- getBaseRole (parentRole, Nothing, nullAttr) customRoles
- fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
- annotate :: [String] -> [String]
- annotate = maybe id (:) $
- if baseRole == "code"
- then lookup "language" fields
- else Nothing
- attr = let (ident, classes, keyValues) = baseAttr
- -- nub in case role name & language class are the same
- in (ident, nub . (role :) . annotate $ classes, keyValues)
-
- -- warn about syntax we ignore
- flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (baseRole /= "code") $ logMessage $
- SkippedContent ":language: [because parent of role is not :code:]"
- pos
- "format" -> when (baseRole /= "raw") $ logMessage $
- SkippedContent ":format: [because parent of role is not :raw:]" pos
- _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
- when (parentRole == "raw" && countKeys "format" > 1) $
- logMessage $ SkippedContent ":format: [after first in definition of role]"
- pos
- when (parentRole == "code" && countKeys "language" > 1) $
- logMessage $ SkippedContent
- ":language: [after first in definition of role]" pos
-
- updateState $ \s -> s {
- stateRstCustomRoles =
- M.insert role (baseRole, fmt, attr) customRoles
- }
-
- return mempty
- where
- countKeys k = length . filter (== k) . map fst $ fields
- inheritedRole =
- (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
-
-
--- Can contain character codes as decimal numbers or
--- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
--- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
--- or text, which is used as-is. Comments start with ..
-unicodeTransform :: String -> String
-unicodeTransform t =
- case t of
- ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment
- ('0':'x':xs) -> go "0x" xs
- ('x':xs) -> go "x" xs
- ('\\':'x':xs) -> go "\\x" xs
- ('U':'+':xs) -> go "U+" xs
- ('u':xs) -> go "u" xs
- ('\\':'u':xs) -> go "\\u" xs
- ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs)
- -- drop semicolon
- (\(c,s) -> c : unicodeTransform (drop 1 s))
- $ extractUnicodeChar xs
- (x:xs) -> x : unicodeTransform xs
- [] -> []
- where go pref zs = maybe (pref ++ unicodeTransform zs)
- (\(c,s) -> c : unicodeTransform s)
- $ extractUnicodeChar zs
-
-extractUnicodeChar :: String -> Maybe (Char, String)
-extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
- where (ds,rest) = span isHexDigit s
- mbc = safeRead ('\'':'\\':'x':ds ++ "'")
-
-extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
-extractCaption = do
- capt <- trimInlines . mconcat <$> many inline
- legend <- optional blanklines >> (mconcat <$> many block)
- return (capt,legend)
-
--- divide string by blanklines
-toChunks :: String -> [String]
-toChunks = dropWhile null
- . map (trim . unlines)
- . splitBy (all (`elem` (" \t" :: String))) . lines
-
-codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
-codeblock classes numberLines lang body =
- return $ B.codeBlockWith attribs $ stripTrailingNewlines body
- where attribs = ("", classes', kvs)
- classes' = "sourceCode" : lang
- : maybe [] (\_ -> ["numberLines"]) numberLines
- ++ classes
- kvs = case numberLines of
- Just "" -> []
- Nothing -> []
- Just n -> [("startFrom",trim n)]
-
----
---- note block
----
-
-noteBlock :: Monad m => RSTParser m [Char]
-noteBlock = try $ do
- startPos <- getPosition
- string ".."
- spaceChar >> skipMany spaceChar
- ref <- noteMarker
- first <- (spaceChar >> skipMany spaceChar >> anyLine)
- <|> (newline >> return "")
- blanks <- option "" blanklines
- rest <- option "" indentedBlock
- endPos <- getPosition
- let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
- let newnote = (ref, raw)
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-noteMarker :: Monad m => RSTParser m [Char]
-noteMarker = do
- char '['
- res <- many1 digit
- <|> (try $ char '#' >> liftM ('#':) simpleReferenceName')
- <|> count 1 (oneOf "#*")
- char ']'
- return res
-
---
--- reference key
---
-
-quotedReferenceName :: PandocMonad m => RSTParser m Inlines
-quotedReferenceName = try $ do
- char '`' >> notFollowedBy (char '`') -- `` means inline code!
- label' <- trimInlines . mconcat <$> many1Till inline (char '`')
- return label'
-
-unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
-unquotedReferenceName = try $ do
- label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
- return label'
-
--- Simple reference names are single words consisting of alphanumerics
--- plus isolated (no two adjacent) internal hyphens, underscores,
--- periods, colons and plus signs; no whitespace or other characters
--- are allowed.
-simpleReferenceName' :: Monad m => ParserT [Char] st m String
-simpleReferenceName' = do
- x <- alphaNum
- xs <- many $ alphaNum
- <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
- return (x:xs)
-
-simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
-simpleReferenceName = do
- raw <- simpleReferenceName'
- return $ B.str raw
-
-referenceName :: PandocMonad m => RSTParser m Inlines
-referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName <* lookAhead (char ':')) <|>
- unquotedReferenceName
-
-referenceKey :: PandocMonad m => RSTParser m [Char]
-referenceKey = do
- startPos <- getPosition
- choice [substKey, anonymousKey, regularKey]
- optional blanklines
- endPos <- getPosition
- -- return enough blanks to replace key
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
-targetURI :: Monad m => ParserT [Char] st m [Char]
-targetURI = do
- skipSpaces
- optional newline
- contents <- many1 (try (many spaceChar >> newline >>
- many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
- blanklines
- return $ escapeURI $ trim $ contents
-
-substKey :: PandocMonad m => RSTParser m ()
-substKey = try $ do
- string ".."
- skipMany1 spaceChar
- (alt,ref) <- withRaw $ trimInlines . mconcat
- <$> enclosed (char '|') (char '|') inline
- res <- B.toList <$> directive'
- il <- case res of
- -- use alt unless :alt: attribute on image:
- [Para [Image attr [Str "image"] (src,tit)]] ->
- return $ B.imageWith attr src tit alt
- [Para [Link _ [Image attr [Str "image"] (src,tit)] (src',tit')]] ->
- return $ B.link src' tit' (B.imageWith attr src tit alt)
- [Para ils] -> return $ B.fromList ils
- _ -> mzero
- let key = toKey $ stripFirstAndLast ref
- updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
-
-anonymousKey :: Monad m => RSTParser m ()
-anonymousKey = try $ do
- oneOfStrings [".. __:", "__"]
- src <- targetURI
- pos <- getPosition
- let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
-
-stripTicks :: String -> String
-stripTicks = reverse . stripTick . reverse . stripTick
- where stripTick ('`':xs) = xs
- stripTick xs = xs
-
-regularKey :: PandocMonad m => RSTParser m ()
-regularKey = try $ do
- string ".. _"
- (_,ref) <- withRaw referenceName
- char ':'
- src <- targetURI
- let key = toKey $ stripTicks ref
- --TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
-
---
--- tables
---
-
--- General tables TODO:
--- - figure out if leading spaces are acceptable and if so, add
--- support for them
---
--- Simple tables TODO:
--- - column spans
--- - multiline support
--- - ensure that rightmost column span does not need to reach end
--- - require at least 2 columns
---
--- Grid tables TODO:
--- - column spans
-
-dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
-dashedLine ch = do
- dashes <- many1 (char ch)
- sp <- many (char ' ')
- return (length dashes, length $ dashes ++ sp)
-
-simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
-simpleDashedLines ch = try $ many1 (dashedLine ch)
-
--- Parse a table row separator
-simpleTableSep :: Monad m => Char -> RSTParser m Char
-simpleTableSep ch = try $ simpleDashedLines ch >> newline
-
--- Parse a table footer
-simpleTableFooter :: Monad m => RSTParser m [Char]
-simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-
--- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
-simpleTableRawLine indices = do
- line <- many1Till anyChar newline
- return (simpleTableSplitLine indices line)
-
--- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
-simpleTableRow indices = do
- notFollowedBy' simpleTableFooter
- firstLine <- simpleTableRawLine indices
- colLines <- return [] -- TODO
- let cols = map unlines . transpose $ firstLine : colLines
- mapM (parseFromString (mconcat <$> many plain)) cols
-
-simpleTableSplitLine :: [Int] -> String -> [String]
-simpleTableSplitLine indices line =
- map trim
- $ tail $ splitByIndices (init indices) line
-
-simpleTableHeader :: PandocMonad m
- => Bool -- ^ Headerless table
- -> RSTParser m ([Blocks], [Alignment], [Int])
-simpleTableHeader headless = try $ do
- optional blanklines
- rawContent <- if headless
- then return ""
- else simpleTableSep '=' >> anyLine
- dashes <- simpleDashedLines '=' <|> simpleDashedLines '-'
- newline
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault
- let rawHeads = if headless
- then replicate (length dashes) ""
- else simpleTableSplitLine indices rawContent
- heads <- mapM (parseFromString (mconcat <$> many plain)) $
- map trim rawHeads
- return (heads, aligns, indices)
-
--- Parse a simple table.
-simpleTable :: PandocMonad m
- => Bool -- ^ Headerless table
- -> RSTParser m Blocks
-simpleTable headless = do
- tbl <- tableWith (simpleTableHeader headless) simpleTableRow
- sep simpleTableFooter
- -- Simple tables get 0s for relative column widths (i.e., use default)
- case B.toList tbl of
- [Table c a _w h l] -> return $ B.singleton $
- Table c a (replicate (length a) 0) h l
- _ ->
- throwError $ PandocShouldNeverHappenError
- "tableWith returned something unexpected"
- where
- sep = return () -- optional (simpleTableSep '-')
-
-gridTable :: PandocMonad m
- => Bool -- ^ Headerless table
- -> RSTParser m Blocks
-gridTable headerless = gridTableWith parseBlocks headerless
-
-table :: PandocMonad m => RSTParser m Blocks
-table = gridTable False <|> simpleTable False <|>
- gridTable True <|> simpleTable True <?> "table"
-
---
--- inline
---
-
-inline :: PandocMonad m => RSTParser m Inlines
-inline = choice [ note -- can start with whitespace, so try before ws
- , whitespace
- , link
- , str
- , endline
- , strong
- , emph
- , code
- , subst
- , interpretedRole
- , smart
- , hyphens
- , escapedChar
- , symbol ] <?> "inline"
-
-parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
-parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
-
-hyphens :: Monad m => RSTParser m Inlines
-hyphens = do
- result <- many1 (char '-')
- optional endline
- -- don't want to treat endline after hyphen or dash as a space
- return $ B.str result
-
-escapedChar :: Monad m => ParserT [Char] st m Inlines
-escapedChar = do c <- escaped anyChar
- return $ if c == ' ' -- '\ ' is null in RST
- then mempty
- else B.str [c]
-
-symbol :: Monad m => RSTParser m Inlines
-symbol = do
- result <- oneOf specialChars
- return $ B.str [result]
-
--- parses inline code, between codeStart and codeEnd
-code :: Monad m => RSTParser m Inlines
-code = try $ do
- string "``"
- result <- manyTill anyChar (try (string "``"))
- return $ B.code
- $ trim $ unwords $ lines result
-
--- succeeds only if we're not right after a str (ie. in middle of word)
-atStart :: Monad m => RSTParser m a -> RSTParser m a
-atStart p = do
- pos <- getPosition
- st <- getState
- -- single quote start can't be right after str
- guard $ stateLastStrPos st /= Just pos
- p
-
-emph :: PandocMonad m => RSTParser m Inlines
-emph = B.emph . trimInlines . mconcat <$>
- enclosed (atStart $ char '*') (char '*') inline
-
-strong :: PandocMonad m => RSTParser m Inlines
-strong = B.strong . trimInlines . mconcat <$>
- enclosed (atStart $ string "**") (try $ string "**") inline
-
--- Note, this doesn't precisely implement the complex rule in
--- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
--- but it should be good enough for most purposes
---
--- TODO:
--- - Classes are silently discarded in addNewRole
--- - Lacks sensible implementation for title-reference (which is the default)
--- - Allows direct use of the :raw: role, rST only allows inherited use.
-interpretedRole :: PandocMonad m => RSTParser m Inlines
-interpretedRole = try $ do
- (role, contents) <- roleBefore <|> roleAfter
- renderRole contents Nothing role nullAttr
-
-renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
-renderRole contents fmt role attr = case role of
- "sup" -> return $ B.superscript $ B.str contents
- "superscript" -> return $ B.superscript $ B.str contents
- "sub" -> return $ B.subscript $ B.str contents
- "subscript" -> return $ B.subscript $ B.str contents
- "emphasis" -> return $ B.emph $ B.str contents
- "strong" -> return $ B.strong $ B.str contents
- "rfc-reference" -> return $ rfcLink contents
- "RFC" -> return $ rfcLink contents
- "pep-reference" -> return $ pepLink contents
- "PEP" -> return $ pepLink contents
- "literal" -> return $ B.codeWith attr contents
- "math" -> return $ B.math contents
- "title-reference" -> titleRef contents
- "title" -> titleRef contents
- "t" -> titleRef contents
- "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
- "span" -> return $ B.spanWith attr $ B.str contents
- "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
- custom -> do
- customRoles <- stateRstCustomRoles <$> getState
- case M.lookup custom customRoles of
- Just (newRole, newFmt, newAttr) ->
- renderRole contents newFmt newRole newAttr
- Nothing -> do
- pos <- getPosition
- logMessage $ SkippedContent (":" ++ custom ++ ":") pos
- return $ B.str contents -- Undefined role
- where
- titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
- rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
- where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
- pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
- where padNo = replicate (4 - length pepNo) '0' ++ pepNo
- pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
-
-addClass :: String -> Attr -> Attr
-addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
-
-roleName :: PandocMonad m => RSTParser m String
-roleName = many1 (letter <|> char '-')
-
-roleMarker :: PandocMonad m => RSTParser m String
-roleMarker = char ':' *> roleName <* char ':'
-
-roleBefore :: PandocMonad m => RSTParser m (String,String)
-roleBefore = try $ do
- role <- roleMarker
- contents <- unmarkedInterpretedText
- return (role,contents)
-
-roleAfter :: PandocMonad m => RSTParser m (String,String)
-roleAfter = try $ do
- contents <- unmarkedInterpretedText
- role <- roleMarker <|> (stateRstDefaultRole <$> getState)
- return (role,contents)
-
-unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
-unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
-
-whitespace :: PandocMonad m => RSTParser m Inlines
-whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
-
-str :: Monad m => RSTParser m Inlines
-str = do
- let strChar = noneOf ("\t\n " ++ specialChars)
- result <- many1 strChar
- updateLastStrPos
- return $ B.str result
-
--- an endline character that can be treated as a space, not a structural break
-endline :: Monad m => RSTParser m Inlines
-endline = try $ do
- newline
- notFollowedBy blankline
- -- parse potential list-starts at beginning of line differently in a list:
- st <- getState
- if (stateParserContext st) == ListItemState
- then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
- notFollowedBy' bulletListStart
- else return ()
- return B.softbreak
-
---
--- links
---
-
-link :: PandocMonad m => RSTParser m Inlines
-link = choice [explicitLink, referenceLink, autoLink] <?> "link"
-
-explicitLink :: PandocMonad m => RSTParser m Inlines
-explicitLink = try $ do
- char '`'
- notFollowedBy (char '`') -- `` marks start of inline code
- label' <- trimInlines . mconcat <$>
- manyTill (notFollowedBy (char '`') >> inline) (char '<')
- src <- trim <$> manyTill (noneOf ">\n") (char '>')
- skipSpaces
- string "`_"
- optional $ char '_' -- anonymous form
- let label'' = if label' == mempty
- then B.str src
- else label'
- -- `link <google_>` is a reference link to _google!
- ((src',tit),attr) <- case reverse src of
- '_':xs -> lookupKey [] (toKey (reverse xs))
- _ -> return ((src, ""), nullAttr)
- return $ B.linkWith attr (escapeURI src') tit label''
-
-referenceLink :: PandocMonad m => RSTParser m Inlines
-referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
- char '_'
- let isAnonKey (Key ('_':_)) = True
- isAnonKey _ = False
- state <- getState
- let keyTable = stateKeys state
- key <- option (toKey $ stripTicks ref) $
- do char '_'
- let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
- case anonKeys of
- [] -> mzero
- (k:_) -> return k
- ((src,tit), attr) <- lookupKey [] key
- -- if anonymous link, remove key so it won't be used again
- when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
- return $ B.linkWith attr src tit label'
-
--- We keep a list of oldkeys so we can detect lookup loops.
-lookupKey :: PandocMonad m
- => [Key] -> Key -> RSTParser m ((String, String), Attr)
-lookupKey oldkeys key = do
- pos <- getPosition
- state <- getState
- let keyTable = stateKeys state
- case M.lookup key keyTable of
- Nothing -> do
- let Key key' = key
- logMessage $ ReferenceNotFound key' pos
- return (("",""),nullAttr)
- -- check for keys of the form link_, which need to be resolved:
- Just ((u@(_:_),""),_) | last u == '_' -> do
- let rawkey = init u
- let newkey = toKey rawkey
- if newkey `elem` oldkeys
- then do
- logMessage $ CircularReference rawkey pos
- return (("",""),nullAttr)
- else lookupKey (key:oldkeys) newkey
- Just val -> return val
-
-autoURI :: Monad m => RSTParser m Inlines
-autoURI = do
- (orig, src) <- uri
- return $ B.link src "" $ B.str orig
-
-autoEmail :: Monad m => RSTParser m Inlines
-autoEmail = do
- (orig, src) <- emailAddress
- return $ B.link src "" $ B.str orig
-
-autoLink :: PandocMonad m => RSTParser m Inlines
-autoLink = autoURI <|> autoEmail
-
-subst :: PandocMonad m => RSTParser m Inlines
-subst = try $ do
- (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline
- state <- getState
- let substTable = stateSubstitutions state
- let key = toKey $ stripFirstAndLast ref
- case M.lookup key substTable of
- Nothing -> do
- pos <- getPosition
- logMessage $ ReferenceNotFound (show key) pos
- return mempty
- Just target -> return target
-
-note :: PandocMonad m => RSTParser m Inlines
-note = try $ do
- optional whitespace
- ref <- noteMarker
- char '_'
- state <- getState
- let notes = stateNotes state
- case lookup ref notes of
- Nothing -> do
- pos <- getPosition
- logMessage $ ReferenceNotFound ref pos
- return mempty
- Just raw -> do
- -- We temporarily empty the note list while parsing the note,
- -- so that we don't get infinite loops with notes inside notes...
- -- Note references inside other notes are allowed in reST, but
- -- not yet in this implementation.
- updateState $ \st -> st{ stateNotes = [] }
- contents <- parseFromString parseBlocks raw
- let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
- -- delete the note so the next auto-numbered note
- -- doesn't get the same contents:
- then deleteFirstsBy (==) notes [(ref,raw)]
- else notes
- updateState $ \st -> st{ stateNotes = newnotes }
- return $ B.note contents
-
-smart :: PandocMonad m => RSTParser m Inlines
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $
- B.singleQuoted . trimInlines . mconcat <$>
- many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $
- B.doubleQuoted . trimInlines . mconcat <$>
- many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
deleted file mode 100644
index 3b89f2ee9..000000000
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ /dev/null
@@ -1,525 +0,0 @@
-{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
--- RelaxedPolyRec needed for inlinesBetween on GHC < 7
-{-
- Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.TWiki
- Copyright : Copyright (C) 2014 Alexander Sulfrian
- License : GNU GPL, version 2 or above
-
- Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
- Stability : alpha
- Portability : portable
-
-Conversion of twiki text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.TWiki ( readTWiki
- ) where
-
-import Text.Pandoc.Definition
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Options
-import Text.Pandoc.Logging
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
-import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Control.Monad
-import Text.Pandoc.XML (fromEntities)
-import Data.Maybe (fromMaybe)
-import Text.HTML.TagSoup
-import Data.Char (isAlphaNum)
-import qualified Data.Foldable as F
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, report)
-
--- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
- => ReaderOptions
- -> String
- -> m Pandoc
-readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n")
- case res of
- Left e -> throwError e
- Right d -> return d
-
-type TWParser = ParserT [Char] ParserState
-
---
--- utility functions
---
-
-tryMsg :: String -> TWParser m a -> TWParser m a
-tryMsg msg p = try p <?> msg
-
-skip :: TWParser m a -> TWParser m ()
-skip parser = parser >> return ()
-
-nested :: PandocMonad m => TWParser m a -> TWParser m a
-nested p = do
- nestlevel <- stateMaxNestingLevel <$> getState
- guard $ nestlevel > 0
- updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
- res <- p
- updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
- return res
-
-htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
-htmlElement tag = tryMsg tag $ do
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar (endtag <|> endofinput)
- return (htmlAttrToPandoc attr, trim content)
- where
- endtag = skip $ htmlTag (~== TagClose tag)
- endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
- trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
-
-htmlAttrToPandoc :: [Attribute String] -> Attr
-htmlAttrToPandoc attrs = (ident, classes, keyvals)
- where
- ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
- keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-
-parseHtmlContentWithAttrs :: PandocMonad m
- => String -> TWParser m a -> TWParser m (Attr, [a])
-parseHtmlContentWithAttrs tag parser = do
- (attr, content) <- htmlElement tag
- parsedContent <- try $ parseContent content
- return (attr, parsedContent)
- where
- parseContent = parseFromString $ nested $ manyTill parser endOfContent
- endOfContent = try $ skipMany blankline >> skipSpaces >> eof
-
-parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
-parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
-
---
--- main parser
---
-
-parseTWiki :: PandocMonad m => TWParser m Pandoc
-parseTWiki = do
- bs <- mconcat <$> many block
- spaces
- eof
- return $ B.doc bs
-
-
---
--- block parsers
---
-
-block :: PandocMonad m => TWParser m B.Blocks
-block = do
- pos <- getPosition
- res <- mempty <$ skipMany1 blankline
- <|> blockElements
- <|> para
- skipMany blankline
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
- return res
-
-blockElements :: PandocMonad m => TWParser m B.Blocks
-blockElements = choice [ separator
- , header
- , verbatim
- , literal
- , list ""
- , table
- , blockQuote
- , noautolink
- ]
-
-separator :: PandocMonad m => TWParser m B.Blocks
-separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
-
-header :: PandocMonad m => TWParser m B.Blocks
-header = tryMsg "header" $ do
- string "---"
- level <- many1 (char '+') >>= return . length
- guard $ level <= 6
- classes <- option [] $ string "!!" >> return ["unnumbered"]
- skipSpaces
- content <- B.trimInlines . mconcat <$> manyTill inline newline
- attr <- registerHeader ("", classes, []) content
- return $ B.headerWith attr level $ content
-
-verbatim :: PandocMonad m => TWParser m B.Blocks
-verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
- >>= return . (uncurry B.codeBlockWith)
-
-literal :: PandocMonad m => TWParser m B.Blocks
-literal = htmlElement "literal" >>= return . rawBlock
- where
- format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
- rawBlock (attrs, content) = B.rawBlock (format attrs) content
-
-list :: PandocMonad m => String -> TWParser m B.Blocks
-list prefix = choice [ bulletList prefix
- , orderedList prefix
- , definitionList prefix]
-
-definitionList :: PandocMonad m => String -> TWParser m B.Blocks
-definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
- elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
- return $ B.definitionList elements
- where
- parseDefinitionListItem :: PandocMonad m
- => String -> TWParser m (B.Inlines, [B.Blocks])
- parseDefinitionListItem indent = do
- string (indent ++ "$ ") >> skipSpaces
- term <- many1Till inline $ string ": "
- line <- listItemLine indent $ string "$ "
- return $ (mconcat term, [line])
-
-bulletList :: PandocMonad m => String -> TWParser m B.Blocks
-bulletList prefix = tryMsg "bulletList" $
- parseList prefix (char '*') (char ' ')
-
-orderedList :: PandocMonad m => String -> TWParser m B.Blocks
-orderedList prefix = tryMsg "orderedList" $
- parseList prefix (oneOf "1iIaA") (string ". ")
-
-parseList :: PandocMonad m
- => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
-parseList prefix marker delim = do
- (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
- blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
- return $ case style of
- '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
- 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
- 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
- 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
- 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
- _ -> B.bulletList blocks
- where
- listStyle = do
- indent <- many1 $ string " "
- style <- marker
- return (concat indent, style)
-
-parseListItem :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
-
-listItemLine :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
- where
- lineContent = do
- content <- anyLine
- continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
- filterSpaces = reverse . dropWhile (== ' ') . reverse
- listContinuation = notFollowedBy (string prefix >> marker) >>
- string " " >> lineContent
- parseContent = parseFromString $ many1 $ nestedList <|> parseInline
- parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
- return . B.plain . mconcat
- nestedList = list prefix
- lastNewline = try $ char '\n' <* eof
- newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
-
-table :: PandocMonad m => TWParser m B.Blocks
-table = try $ do
- tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
- rows <- many1 tableParseRow
- return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
- where
- buildTable caption rows (aligns, heads)
- = B.table caption aligns heads rows
- align rows = replicate (columCount rows) (AlignDefault, 0)
- columns rows = replicate (columCount rows) mempty
- columCount rows = length $ head rows
-
-tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
-tableParseHeader = try $ do
- char '|'
- leftSpaces <- many spaceChar >>= return . length
- char '*'
- content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
- char '*'
- rightSpaces <- many spaceChar >>= return . length
- optional tableEndOfRow
- return (tableAlign leftSpaces rightSpaces, content)
- where
- tableAlign left right
- | left >= 2 && left == right = (AlignCenter, 0)
- | left > right = (AlignRight, 0)
- | otherwise = (AlignLeft, 0)
-
-tableParseRow :: PandocMonad m => TWParser m [B.Blocks]
-tableParseRow = many1Till tableParseColumn newline
-
-tableParseColumn :: PandocMonad m => TWParser m B.Blocks
-tableParseColumn = char '|' *> skipSpaces *>
- tableColumnContent (skipSpaces >> char '|')
- <* skipSpaces <* optional tableEndOfRow
-
-tableEndOfRow :: PandocMonad m => TWParser m Char
-tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
-
-tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
-tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
- where
- content = continuation <|> inline
- continuation = try $ char '\\' >> newline >> return mempty
-
-blockQuote :: PandocMonad m => TWParser m B.Blocks
-blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
-
-noautolink :: PandocMonad m => TWParser m B.Blocks
-noautolink = do
- (_, content) <- htmlElement "noautolink"
- st <- getState
- setState $ st{ stateAllowLinks = False }
- blocks <- try $ parseContent content
- setState $ st{ stateAllowLinks = True }
- return $ mconcat blocks
- where
- parseContent = parseFromString $ many $ block
-
-para :: PandocMonad m => TWParser m B.Blocks
-para = many1Till inline endOfParaElement >>= return . result . mconcat
- where
- endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
- endOfInput = try $ skipMany blankline >> skipSpaces >> eof
- endOfPara = try $ blankline >> skipMany1 blankline
- newBlockElement = try $ blankline >> skip blockElements
- result content = if F.all (==Space) content
- then mempty
- else B.para $ B.trimInlines content
-
-
---
--- inline parsers
---
-
-inline :: PandocMonad m => TWParser m B.Inlines
-inline = choice [ whitespace
- , br
- , macro
- , strong
- , strongHtml
- , strongAndEmph
- , emph
- , emphHtml
- , boldCode
- , smart
- , link
- , htmlComment
- , code
- , codeHtml
- , nop
- , autoLink
- , str
- , symbol
- ] <?> "inline"
-
-whitespace :: PandocMonad m => TWParser m B.Inlines
-whitespace = (lb <|> regsp) >>= return
- where lb = try $ skipMany spaceChar >> linebreak >> return B.space
- regsp = try $ skipMany1 spaceChar >> return B.space
-
-br :: PandocMonad m => TWParser m B.Inlines
-br = try $ string "%BR%" >> return B.linebreak
-
-linebreak :: PandocMonad m => TWParser m B.Inlines
-linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
- where lastNewline = eof >> return mempty
- innerNewline = return B.space
-
-between :: (Monoid c, PandocMonad m)
- => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c)
- -> TWParser m c
-between start end p =
- mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
-
-enclosed :: (Monoid b, PandocMonad m)
- => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
-enclosed sep p = between sep (try $ sep <* endMarker) p
- where
- endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
- endSpace = (spaceChar <|> newline) >> return B.space
-
-macro :: PandocMonad m => TWParser m B.Inlines
-macro = macroWithParameters <|> withoutParameters
- where
- withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
- emptySpan name = buildSpan name [] mempty
-
-macroWithParameters :: PandocMonad m => TWParser m B.Inlines
-macroWithParameters = try $ do
- char '%'
- name <- macroName
- (content, kvs) <- attributes
- char '%'
- return $ buildSpan name kvs $ B.str content
-
-buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
-buildSpan className kvs = B.spanWith attrs
- where
- attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
- additionalClasses = maybe [] words $ lookup "class" kvs
- kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
-
-macroName :: PandocMonad m => TWParser m String
-macroName = do
- first <- letter
- rest <- many $ alphaNum <|> char '_'
- return (first:rest)
-
-attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
- return . foldr (either mkContent mkKvs) ([], [])
- where
- spnl = skipMany (spaceChar <|> newline)
- mkContent c ([], kvs) = (c, kvs)
- mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
- mkKvs kv (cont, rest) = (cont, (kv : rest))
-
-attribute :: PandocMonad m => TWParser m (Either String (String, String))
-attribute = withKey <|> withoutKey
- where
- withKey = try $ do
- key <- macroName
- char '='
- parseValue False >>= return . (curry Right key)
- withoutKey = try $ parseValue True >>= return . Left
- parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
- withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
- withoutQuotes allowSpaces
- | allowSpaces == True = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
-
-nestedInlines :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m B.Inlines
-nestedInlines end = innerSpace <|> nestedInline
- where
- innerSpace = try $ whitespace <* (notFollowedBy end)
- nestedInline = notFollowedBy whitespace >> nested inline
-
-strong :: PandocMonad m => TWParser m B.Inlines
-strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
-
-strongHtml :: PandocMonad m => TWParser m B.Inlines
-strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
- >>= return . B.strong . mconcat
-
-strongAndEmph :: PandocMonad m => TWParser m B.Inlines
-strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
-
-emph :: PandocMonad m => TWParser m B.Inlines
-emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
-
-emphHtml :: PandocMonad m => TWParser m B.Inlines
-emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
- >>= return . B.emph . mconcat
-
-nestedString :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> (count 1 nonspaceChar)
- where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
-
-boldCode :: PandocMonad m => TWParser m B.Inlines
-boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
-
-htmlComment :: PandocMonad m => TWParser m B.Inlines
-htmlComment = htmlTag isCommentTag >> return mempty
-
-code :: PandocMonad m => TWParser m B.Inlines
-code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
-
-codeHtml :: PandocMonad m => TWParser m B.Inlines
-codeHtml = do
- (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
- return $ B.codeWith attrs $ fromEntities content
-
-autoLink :: PandocMonad m => TWParser m B.Inlines
-autoLink = try $ do
- state <- getState
- guard $ stateAllowLinks state
- (text, url) <- parseLink
- guard $ checkLink (head $ reverse url)
- return $ makeLink (text, url)
- where
- parseLink = notFollowedBy nop >> (uri <|> emailAddress)
- makeLink (text, url) = B.link url "" $ B.str text
- checkLink c
- | c == '/' = True
- | otherwise = isAlphaNum c
-
-str :: PandocMonad m => TWParser m B.Inlines
-str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
-
-nop :: PandocMonad m => TWParser m B.Inlines
-nop = try $ (skip exclamation <|> skip nopTag) >> followContent
- where
- exclamation = char '!'
- nopTag = stringAnyCase "<nop>"
- followContent = many1 nonspaceChar >>= return . B.str . fromEntities
-
-symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = count 1 nonspaceChar >>= return . B.str
-
-smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice [ apostrophe
- , dash
- , ellipses
- ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $
- many1Till inline singleQuoteEnd >>=
- (return . B.singleQuoted . B.trimInlines . mconcat)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
- doubleQuoteStart
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
- return (B.doubleQuoted $ B.trimInlines contents))
- <|> (return $ (B.str "\8220") B.<> contents)
-
-link :: PandocMonad m => TWParser m B.Inlines
-link = try $ do
- st <- getState
- guard $ stateAllowLinks st
- setState $ st{ stateAllowLinks = False }
- (url, title, content) <- linkText
- setState $ st{ stateAllowLinks = True }
- return $ B.link url title content
-
-linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
-linkText = do
- string "[["
- url <- many1Till anyChar (char ']')
- content <- option [B.str url] linkContent
- char ']'
- return (url, "", mconcat content)
- where
- linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
- parseLinkContent = parseFromString $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
deleted file mode 100644
index 6594b9ab8..000000000
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ /dev/null
@@ -1,729 +0,0 @@
-{-
-Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
- and John MacFarlane
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Textile
- Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : Paul Rivier <paul*rivier#demotera*com>
- Stability : alpha
- Portability : portable
-
-Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
-
-Implemented and parsed:
- - Paragraphs
- - Code blocks
- - Lists
- - blockquote
- - Inlines : strong, emph, cite, code, deleted, superscript,
- subscript, links
- - footnotes
- - HTML-specific and CSS-specific attributes on headers
-
-Left to be implemented:
- - dimension sign
- - all caps
- - continued blocks (ex bq..)
-
-TODO : refactor common patterns across readers :
- - more ...
-
--}
-
-
-module Text.Pandoc.Readers.Textile ( readTextile) where
-import Text.Pandoc.CSS
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing
-import Text.Pandoc.Logging
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
-import Text.Pandoc.Shared (trim)
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.HTML.TagSoup (fromAttrib, Tag(..))
-import Text.HTML.TagSoup.Match
-import Data.List ( intercalate, transpose, intersperse )
-import Data.Char ( digitToInt, isUpper )
-import Control.Monad ( guard, liftM )
-import Data.Monoid ((<>))
-import Text.Pandoc.Class (PandocMonad, report)
-import Control.Monad.Except (throwError)
-
--- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> m Pandoc
-readTextile opts s = do
- parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n")
- case parsed of
- Right result -> return result
- Left e -> throwError e
-
-
--- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
-parseTextile = do
- many blankline
- startPos <- getPosition
- -- go through once just to get list of reference keys and notes
- -- docMinusKeys is the raw document with blanks where the keys/notes were...
- let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . concat
- setPosition startPos
- st' <- getState
- let reversedNotes = stateNotes st'
- updateState $ \s -> s { stateNotes = reverse reversedNotes }
- -- now parse it for real...
- blocks <- parseBlocks
- return $ Pandoc nullMeta (B.toList blocks) -- FIXME
-
-noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
-noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
-
-noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
-noteBlock = try $ do
- startPos <- getPosition
- ref <- noteMarker
- optional blankline
- contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
- endPos <- getPosition
- let newnote = (ref, contents ++ "\n")
- st <- getState
- let oldnotes = stateNotes st
- updateState $ \s -> s { stateNotes = newnote : oldnotes }
- -- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-
--- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
-parseBlocks = mconcat <$> manyTill block eof
-
--- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
-blockParsers = [ codeBlock
- , header
- , blockQuote
- , hrule
- , commentBlock
- , anyList
- , rawHtmlBlock
- , rawLaTeXBlock'
- , table
- , maybeExplicitBlock "p" para
- , mempty <$ blanklines
- ]
-
--- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT [Char] ParserState m Blocks
-block = do
- res <- choice blockParsers <?> "block"
- pos <- getPosition
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
- return res
-
-commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
-commentBlock = try $ do
- string "###."
- manyTill anyLine blanklines
- return mempty
-
-codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
-codeBlock = codeBlockBc <|> codeBlockPre
-
-codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
-codeBlockBc = try $ do
- string "bc."
- extended <- option False (True <$ char '.')
- char ' '
- let starts = ["p", "table", "bq", "bc", "h1", "h2", "h3",
- "h4", "h5", "h6", "pre", "###", "notextile"]
- let ender = choice $ map explicitBlockStart starts
- contents <- if extended
- then do
- f <- anyLine
- rest <- many (notFollowedBy ender *> anyLine)
- return (f:rest)
- else manyTill anyLine blanklines
- return $ B.codeBlock (trimTrailingNewlines (unlines contents))
-
-trimTrailingNewlines :: String -> String
-trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
-
--- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
-codeBlockPre = try $ do
- (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
- result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
- optional blanklines
- -- drop leading newline if any
- let result'' = case result' of
- '\n':xs -> xs
- _ -> result'
- -- drop trailing newline if any
- let result''' = case reverse result'' of
- '\n':_ -> init result''
- _ -> result''
- let classes = words $ fromAttrib "class" t
- let ident = fromAttrib "id" t
- let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.codeBlockWith (ident,classes,kvs) result'''
-
--- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT [Char] ParserState m Blocks
-header = try $ do
- char 'h'
- level <- digitToInt <$> oneOf "123456"
- attr <- attributes
- char '.'
- lookAhead whitespace
- name <- trimInlines . mconcat <$> many inline
- attr' <- registerHeader attr name
- return $ B.headerWith attr' level name
-
--- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
-blockQuote = try $ do
- string "bq" >> attributes >> char '.' >> whitespace
- B.blockQuote <$> para
-
--- Horizontal rule
-
-hrule :: PandocMonad m => ParserT [Char] st m Blocks
-hrule = try $ do
- skipSpaces
- start <- oneOf "-*"
- count 2 (skipSpaces >> char start)
- skipMany (spaceChar <|> char start)
- newline
- optional blanklines
- return B.horizontalRule
-
--- Lists handling
-
--- | Can be a bullet list or an ordered list. This implementation is
--- strict in the nesting, sublist must start at exactly "parent depth
--- plus one"
-anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
-anyList = try $ anyListAtDepth 1 <* blanklines
-
--- | This allow one type of list to be nested into an other type,
--- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
-anyListAtDepth depth = choice [ bulletListAtDepth depth,
- orderedListAtDepth depth,
- definitionList ]
-
--- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
-bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-
--- | Bullet List Item of given depth, depth being the number of
--- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
-bulletListItemAtDepth = genericListItemAtDepth '*'
-
--- | Ordered List of given depth, depth being the number of
--- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
-orderedListAtDepth depth = try $ do
- items <- many1 (orderedListItemAtDepth depth)
- return $ B.orderedList items
-
--- | Ordered List Item of given depth, depth being the number of
--- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
-orderedListItemAtDepth = genericListItemAtDepth '#'
-
--- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
-genericListItemAtDepth c depth = try $ do
- count depth (char c) >> attributes >> whitespace
- p <- mconcat <$> many listInline
- newline
- sublist <- option mempty (anyListAtDepth (depth + 1))
- return $ (B.plain p) <> sublist
-
--- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
-definitionList = try $ B.definitionList <$> many1 definitionListItem
-
--- | List start character.
-listStart :: PandocMonad m => ParserT [Char] ParserState m ()
-listStart = genericListStart '*'
- <|> () <$ genericListStart '#'
- <|> () <$ definitionListStart
-
-genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
-genericListStart c = () <$ try (many1 (char c) >> whitespace)
-
-basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
-basicDLStart = do
- char '-'
- whitespace
- notFollowedBy newline
-
-definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
-definitionListStart = try $ do
- basicDLStart
- trimInlines . mconcat <$>
- many1Till inline
- ( try (newline *> lookAhead basicDLStart)
- <|> try (lookAhead (() <$ string ":="))
- )
-
-listInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
-listInline = try (notFollowedBy newline >> inline)
- <|> try (endline <* notFollowedBy listStart)
-
--- | A definition list item in textile begins with '- ', followed by
--- the term defined, then spaces and ":=". The definition follows, on
--- the same single line, or spaned on multiple line, after a line
--- break.
-definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
-definitionListItem = try $ do
- term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
- def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
- return (term, def')
- where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
- inlineDef = liftM (\d -> [B.plain d])
- $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
- multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
- multilineDef = try $ do
- optional whitespace >> newline
- s <- many1Till anyChar (try (string "=:" >> newline))
- -- this ++ "\n\n" does not look very good
- ds <- parseFromString parseBlocks (s ++ "\n\n")
- return [ds]
-
--- raw content
-
--- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
-rawHtmlBlock = try $ do
- skipMany spaceChar
- (_,b) <- htmlTag isBlockTag
- optional blanklines
- return $ B.rawBlock "html" b
-
--- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
-rawLaTeXBlock' = do
- guardEnabled Ext_raw_tex
- B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-
-
--- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT [Char] ParserState m Blocks
-para = B.para . trimInlines . mconcat <$> many1 inline
-
--- Tables
-
-toAlignment :: Char -> Alignment
-toAlignment '<' = AlignLeft
-toAlignment '>' = AlignRight
-toAlignment '=' = AlignCenter
-toAlignment _ = AlignDefault
-
-cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
-cellAttributes = try $ do
- isHeader <- option False (True <$ char '_')
- -- we just ignore colspan and rowspan markers:
- optional $ try $ oneOf "/\\" >> many1 digit
- -- we pay attention to alignments:
- alignment <- option AlignDefault $ toAlignment <$> oneOf "<>="
- -- ignore other attributes for now:
- _ <- attributes
- char '.'
- return (isHeader, alignment)
-
--- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
-tableCell = try $ do
- char '|'
- (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes
- notFollowedBy blankline
- raw <- trim <$>
- many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
- content <- mconcat <$> parseFromString (many inline) raw
- return ((isHeader, alignment), B.plain content)
-
--- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
-tableRow = try $ do
- -- skip optional row attributes
- optional $ try $ do
- _ <- attributes
- char '.'
- many1 spaceChar
- many1 tableCell <* char '|' <* blankline
-
--- | A table with an optional header.
-table :: PandocMonad m => ParserT [Char] ParserState m Blocks
-table = try $ do
- -- ignore table attributes
- caption <- option mempty $ try $ do
- string "table"
- _ <- attributes
- char '.'
- rawcapt <- trim <$> anyLine
- parseFromString (mconcat <$> many inline) rawcapt
- rawrows <- many1 $ (skipMany ignorableRow) >> tableRow
- skipMany ignorableRow
- blanklines
- let (headers, rows) = case rawrows of
- (toprow:rest) | any (fst . fst) toprow ->
- (toprow, rest)
- _ -> (mempty, rawrows)
- let nbOfCols = max (length headers) (length $ head rows)
- let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
- return $ B.table caption
- (zip aligns (replicate nbOfCols 0.0))
- (map snd headers)
- (map (map snd) rows)
-
--- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
-ignorableRow = try $ do
- char '|'
- oneOf ":^-~"
- _ <- attributes
- char '.'
- _ <- anyLine
- return ()
-
-explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
-explicitBlockStart name = try $ do
- string name
- attributes
- char '.'
- optional whitespace
- optional endline
-
--- | Blocks like 'p' and 'table' do not need explicit block tag.
--- However, they can be used to set HTML/CSS attributes when needed.
-maybeExplicitBlock :: PandocMonad m
- => String -- ^ block tag name
- -> ParserT [Char] ParserState m Blocks -- ^ implicit block
- -> ParserT [Char] ParserState m Blocks
-maybeExplicitBlock name blk = try $ do
- optional $ explicitBlockStart name
- blk
-
-
-
-----------
--- Inlines
-----------
-
-
--- | Any inline element
-inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
-inline = do
- choice inlineParsers <?> "inline"
-
--- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
-inlineParsers = [ str
- , whitespace
- , endline
- , code
- , escapedInline
- , inlineMarkup
- , groupedInlineMarkup
- , rawHtmlInline
- , rawLaTeXInline'
- , note
- , link
- , image
- , mark
- , (B.str . (:[])) <$> characterReference
- , smartPunctuation inline
- , symbol
- ]
-
--- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
-inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
- , simpleInline (string "**") B.strong
- , simpleInline (string "__") B.emph
- , simpleInline (char '*') B.strong
- , simpleInline (char '_') B.emph
- , simpleInline (char '+') B.emph -- approximates underline
- , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
- , simpleInline (char '^') B.superscript
- , simpleInline (char '~') B.subscript
- , simpleInline (char '%') id
- ]
-
--- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT [Char] st m Inlines
-mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-
-reg :: PandocMonad m => ParserT [Char] st m Inlines
-reg = do
- oneOf "Rr"
- char ')'
- return $ B.str "\174"
-
-tm :: PandocMonad m => ParserT [Char] st m Inlines
-tm = do
- oneOf "Tt"
- oneOf "Mm"
- char ')'
- return $ B.str "\8482"
-
-copy :: PandocMonad m => ParserT [Char] st m Inlines
-copy = do
- oneOf "Cc"
- char ')'
- return $ B.str "\169"
-
-note :: PandocMonad m => ParserT [Char] ParserState m Inlines
-note = try $ do
- ref <- (char '[' *> many1 digit <* char ']')
- notes <- stateNotes <$> getState
- case lookup ref notes of
- Nothing -> fail "note not found"
- Just raw -> B.note <$> parseFromString parseBlocks raw
-
--- | Special chars
-markupChars :: [Char]
-markupChars = "\\*#_@~-+^|%=[]&"
-
--- | Break strings on following chars. Space tab and newline break for
--- inlines breaking. Open paren breaks for mark. Quote, dash and dot
--- break for smart punctuation. Punctuation breaks for regular
--- punctuation. Double quote breaks for named links. > and < break
--- for inline html.
-stringBreakers :: [Char]
-stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
-
-wordBoundaries :: [Char]
-wordBoundaries = markupChars ++ stringBreakers
-
--- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
-hyphenedWords = do
- x <- wordChunk
- xs <- many (try $ char '-' >> wordChunk)
- return $ intercalate "-" (x:xs)
-
-wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
-wordChunk = try $ do
- hd <- noneOf wordBoundaries
- tl <- many ( (noneOf wordBoundaries) <|>
- try (notFollowedBy' note *> oneOf markupChars
- <* lookAhead (noneOf wordBoundaries) ) )
- return $ hd:tl
-
--- | Any string
-str :: PandocMonad m => ParserT [Char] ParserState m Inlines
-str = do
- baseStr <- hyphenedWords
- -- RedCloth compliance : if parsed word is uppercase and immediatly
- -- followed by parens, parens content is unconditionally word acronym
- fullStr <- option baseStr $ try $ do
- guard $ all isUpper baseStr
- acro <- enclosed (char '(') (char ')') anyChar'
- return $ concat [baseStr, " (", acro, ")"]
- updateLastStrPos
- return $ B.str fullStr
-
--- | Some number of space chars
-whitespace :: PandocMonad m => ParserT [Char] st m Inlines
-whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-
--- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
-endline = try $ do
- newline
- notFollowedBy blankline
- notFollowedBy listStart
- notFollowedBy rawHtmlBlock
- return B.linebreak
-
-rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
-rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-
--- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
-rawLaTeXInline' = try $ do
- guardEnabled Ext_raw_tex
- B.singleton <$> rawLaTeXInline
-
--- | Textile standard link syntax is "label":target. But we
--- can also have ["label":target].
-link :: PandocMonad m => ParserT [Char] ParserState m Inlines
-link = try $ do
- bracketed <- (True <$ char '[') <|> return False
- char '"' *> notFollowedBy (oneOf " \t\n\r")
- attr <- attributes
- name <- trimInlines . mconcat <$>
- withQuoteContext InDoubleQuote (many1Till inline (char '"'))
- char ':'
- let stop = if bracketed
- then char ']'
- else lookAhead $ space <|>
- try (oneOf "!.,;:" *> (space <|> newline))
- url <- many1Till nonspaceChar stop
- let name' = if B.toList name == [Str "$"] then B.str url else name
- return $ if attr == nullAttr
- then B.link url "" name'
- else B.spanWith attr $ B.link url "" name'
-
--- | image embedding
-image :: PandocMonad m => ParserT [Char] ParserState m Inlines
-image = try $ do
- char '!' >> notFollowedBy space
- (ident, cls, kvs) <- attributes
- let attr = case lookup "style" kvs of
- Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
- Nothing -> (ident, cls, kvs)
- src <- many1 (noneOf " \t\n\r!(")
- alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')')
- char '!'
- return $ B.imageWith attr src alt (B.str alt)
-
-escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedInline = escapedEqs <|> escapedTag
-
-escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedEqs = B.str <$>
- (try $ string "==" *> manyTill anyChar' (try $ string "=="))
-
--- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedTag = B.str <$>
- (try $ string "<notextile>" *>
- manyTill anyChar' (try $ string "</notextile>"))
-
--- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
-symbol = B.str . singleton <$> (notFollowedBy newline *>
- notFollowedBy rawHtmlBlock *>
- oneOf wordBoundaries)
-
--- | Inline code
-code :: PandocMonad m => ParserT [Char] ParserState m Inlines
-code = code1 <|> code2
-
--- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
-anyChar' =
- satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
-
-code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
-code1 = B.code <$> surrounded (char '@') anyChar'
-
-code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
-code2 = do
- htmlTag (tagOpen (=="tt") null)
- B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-
--- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
-attributes = (foldl (flip ($)) ("",[],[])) <$>
- try (do special <- option id specialAttribute
- attrs <- many attribute
- return (special : attrs))
-
-specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
-specialAttribute = do
- alignStr <- ("center" <$ char '=') <|>
- ("justify" <$ try (string "<>")) <|>
- ("right" <$ char '>') <|>
- ("left" <$ char '<')
- notFollowedBy spaceChar
- return $ addStyle ("text-align:" ++ alignStr)
-
-attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
-attribute = try $
- (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-
-classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
-classIdAttr = try $ do -- (class class #id)
- char '('
- ws <- words `fmap` manyTill anyChar' (char ')')
- case reverse ws of
- [] -> return $ \(_,_,keyvals) -> ("",[],keyvals)
- (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
- (ident',classes',keyvals)
- classes' -> return $ \(_,_,keyvals) ->
- ("",classes',keyvals)
-
-styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
-styleAttr = do
- style <- try $ enclosed (char '{') (char '}') anyChar'
- return $ addStyle style
-
-addStyle :: String -> Attr -> Attr
-addStyle style (id',classes,keyvals) =
- (id',classes,keyvals')
- where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
- style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
-
-langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
-langAttr = do
- lang <- try $ enclosed (char '[') (char ']') alphaNum
- return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
-
--- | Parses material surrounded by a parser.
-surrounded :: PandocMonad m
- => ParserT [Char] st m t -- ^ surrounding parser
- -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
- -> ParserT [Char] st m [a]
-surrounded border =
- enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
-
-simpleInline :: PandocMonad m
- => ParserT [Char] ParserState m t -- ^ surrounding parser
- -> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
-simpleInline border construct = try $ do
- notAfterString
- border *> notFollowedBy (oneOf " \t\n\r")
- attr <- attributes
- body <- trimInlines . mconcat <$>
- withQuoteContext InSingleQuote
- (manyTill (notFollowedBy newline >> inline)
- (try border <* notFollowedBy alphaNum))
- return $ construct $
- if attr == nullAttr
- then body
- else B.spanWith attr body
-
-groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
-groupedInlineMarkup = try $ do
- char '['
- sp1 <- option mempty $ B.space <$ whitespace
- result <- withQuoteContext InSingleQuote inlineMarkup
- sp2 <- option mempty $ B.space <$ whitespace
- char ']'
- return $ sp1 <> result <> sp2
-
--- | Create a singleton list
-singleton :: a -> [a]
-singleton x = [x]
-
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
deleted file mode 100644
index 9e2b6963d..000000000
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ /dev/null
@@ -1,596 +0,0 @@
-{-# LANGUAGE ViewPatterns #-}
-{-
-Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Txt2Tags
- Copyright : Copyright (C) 2014 Matthew Pickering
- License : GNU GPL, version 2 or above
-
- Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
-
-Conversion of txt2tags formatted plain text to 'Pandoc' document.
--}
-module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
- , getT2TMeta
- , T2TMeta (..)
- )
- where
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
-import Data.Monoid ((<>))
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared (escapeURI,compactify, compactifyDL)
-import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
-import Data.Char (toLower)
-import Data.List (transpose, intersperse, intercalate)
-import Data.Maybe (fromMaybe)
---import Network.URI (isURI) -- Not sure whether to use this function
-import Control.Monad (void, guard, when)
-import Data.Default
-import Control.Monad.Reader (Reader, runReader, asks)
-
-import Data.Time.Format (formatTime)
-import Text.Pandoc.Compat.Time (defaultTimeLocale)
-import Control.Monad.Except (throwError, catchError)
-import Text.Pandoc.Class (PandocMonad)
-import qualified Text.Pandoc.Class as P
-
-type T2T = ParserT String ParserState (Reader T2TMeta)
-
--- | An object for the T2T macros meta information
--- the contents of each field is simply substituted verbatim into the file
-data T2TMeta = T2TMeta {
- date :: String -- ^ Current date
- , mtime :: String -- ^ Last modification time of infile
- , infile :: FilePath -- ^ Input file
- , outfile :: FilePath -- ^ Output file
- } deriving Show
-
-instance Default T2TMeta where
- def = T2TMeta "" "" "" ""
-
--- | Get the meta information required by Txt2Tags macros
-getT2TMeta :: PandocMonad m => m T2TMeta
-getT2TMeta = do
- mbInps <- P.getInputFiles
- let inps = case mbInps of
- Just x -> x
- Nothing -> []
- mbOutp <- P.getOutputFile
- let outp = case mbOutp of
- Just x -> x
- Nothing -> ""
- curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
- let getModTime = fmap (formatTime defaultTimeLocale "%T") .
- P.getModificationTime
- curMtime <- case inps of
- [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
- _ -> catchError
- (maximum <$> mapM getModTime inps)
- (const (return ""))
- return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
-
--- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
- => ReaderOptions
- -> String
- -> m Pandoc
-readTxt2Tags opts s = do
- meta <- getT2TMeta
- let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
- case parsed of
- Right result -> return $ result
- Left e -> throwError e
-
--- | Read Txt2Tags (ignoring all macros) from an input string returning
--- a Pandoc document
--- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
--- readTxt2TagsNoMacros = readTxt2Tags
-
-parseT2T :: T2T Pandoc
-parseT2T = do
- -- Parse header if standalone flag is set
- standalone <- getOption readerStandalone
- when standalone parseHeader
- body <- mconcat <$> manyTill block eof
- meta' <- stateMeta <$> getState
- return $ Pandoc meta' (B.toList body)
-
-parseHeader :: T2T ()
-parseHeader = do
- () <$ try blankline <|> header
- meta <- stateMeta <$> getState
- optional blanklines
- config <- manyTill setting (notFollowedBy setting)
- -- TODO: Handle settings better
- let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) meta config
- updateState (\s -> s {stateMeta = settings}) <* optional blanklines
-
-header :: T2T ()
-header = titleline >> authorline >> dateline
-
-headerline :: B.ToMetaValue a => String -> T2T a -> T2T ()
-headerline field p = (() <$ try blankline)
- <|> (p >>= updateState . B.setMeta field)
-
-titleline :: T2T ()
-titleline =
- headerline "title" (trimInlines . mconcat <$> manyTill inline newline)
-
-authorline :: T2T ()
-authorline =
- headerline "author" (sepBy author (char ';') <* newline)
- where
- author = trimInlines . mconcat <$> many (notFollowedBy (char ';' <|> newline) >> inline)
-
-dateline :: T2T ()
-dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline)
-
-type Keyword = String
-type Value = String
-
-setting :: T2T (Keyword, Value)
-setting = do
- string "%!"
- keyword <- ignoreSpacesCap (many1 alphaNum)
- char ':'
- value <- ignoreSpacesCap (manyTill anyChar (newline))
- return (keyword, value)
-
--- Blocks
-
-parseBlocks :: T2T Blocks
-parseBlocks = mconcat <$> manyTill block eof
-
-block :: T2T Blocks
-block = do
- choice
- [ mempty <$ blanklines
- , quote
- , hrule -- hrule must go above title
- , title
- , commentBlock
- , verbatim
- , rawBlock
- , taggedBlock
- , list
- , table
- , para
- ]
-
-title :: T2T Blocks
-title = try $ balancedTitle '+' <|> balancedTitle '='
-
-balancedTitle :: Char -> T2T Blocks
-balancedTitle c = try $ do
- spaces
- level <- length <$> many1 (char c)
- guard (level <= 5) -- Max header level 5
- heading <- manyTill (noneOf "\n\r") (count level (char c))
- label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
- many spaceChar *> newline
- let attr = maybe nullAttr (\x -> (x, [], [])) label
- return $ B.headerWith attr level (trimInlines $ B.text heading)
-
-para :: T2T Blocks
-para = try $ do
- ils <- parseInlines
- nl <- option False (True <$ newline)
- option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
- where
- listStart = try bulletListStart <|> orderedListStart
-
-commentBlock :: T2T Blocks
-commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
-
--- Seperator and Strong line treated the same
-hrule :: T2T Blocks
-hrule = try $ do
- spaces
- line <- many1 (oneOf "=-_")
- guard (length line >= 20)
- B.horizontalRule <$ blankline
-
-quote :: T2T Blocks
-quote = try $ do
- lookAhead tab
- rawQuote <- many1 (tab *> optional spaces *> anyLine)
- contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
- return $ B.blockQuote contents
-
-commentLine :: T2T Inlines
-commentLine = comment
-
--- List Parsing code from Org Reader
-
-list :: T2T Blocks
-list = choice [bulletList, orderedList, definitionList]
-
-bulletList :: T2T Blocks
-bulletList = B.bulletList . compactify
- <$> many1 (listItem bulletListStart parseBlocks)
-
-orderedList :: T2T Blocks
-orderedList = B.orderedList . compactify
- <$> many1 (listItem orderedListStart parseBlocks)
-
-definitionList :: T2T Blocks
-definitionList = try $ do
- B.definitionList . compactifyDL <$>
- many1 (listItem definitionListStart definitionListEnd)
-
-definitionListEnd :: T2T (Inlines, [Blocks])
-definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
-
-genericListStart :: T2T Char
- -> T2T Int
-genericListStart listMarker = try $
- (2+) <$> (length <$> many spaceChar
- <* listMarker <* space <* notFollowedBy space)
-
--- parses bullet list \start and returns its length (excl. following whitespace)
-bulletListStart :: T2T Int
-bulletListStart = genericListStart (char '-')
-
-orderedListStart :: T2T Int
-orderedListStart = genericListStart (char '+' )
-
-definitionListStart :: T2T Int
-definitionListStart = genericListStart (char ':')
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: T2T Int
- -> T2T a
- -> T2T a
-listItem start end = try $ do
- markerLength <- try start
- firstLine <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString end $ firstLine ++ blank ++ rest
-
--- continuation of a list item - indented and separated by blankline or endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Int
- -> T2T String
-listContinuation markerLength = try $
- notFollowedBy' (blankline >> blankline)
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
- where listLine = try $ indentWith markerLength *> anyLineNewline
-
-anyLineNewline :: T2T String
-anyLineNewline = (++ "\n") <$> anyLine
-
-indentWith :: Int -> T2T String
-indentWith n = count n space
-
--- Table
-
-table :: T2T Blocks
-table = try $ do
- tableHeader <- fmap snd <$> option mempty (try headerRow)
- rows <- many1 (many commentLine *> tableRow)
- let columns = transpose rows
- let ncolumns = length columns
- let aligns = map (foldr1 findAlign) (map (map fst) columns)
- let rows' = map (map snd) rows
- let size = maximum (map length rows')
- let rowsPadded = map (pad size) rows'
- let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty
- return $ B.table mempty
- (zip aligns (replicate ncolumns 0.0))
- headerPadded rowsPadded
-
-pad :: (Monoid a) => Int -> [a] -> [a]
-pad n xs = xs ++ (replicate (n - length xs) mempty)
-
-
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
- | x == y = x
- | otherwise = AlignDefault
-
-headerRow :: T2T [(Alignment, Blocks)]
-headerRow = genericRow (string "||")
-
-tableRow :: T2T [(Alignment, Blocks)]
-tableRow = genericRow (char '|')
-
-genericRow :: T2T a -> T2T [(Alignment, Blocks)]
-genericRow start = try $ do
- spaces *> start
- manyTill tableCell newline <?> "genericRow"
-
-
-tableCell :: T2T (Alignment, Blocks)
-tableCell = try $ do
- leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
- content <- (manyTill inline (try $ lookAhead (cellEnd)))
- rightSpaces <- length <$> many space
- let align =
- case compare leftSpaces rightSpaces of
- LT -> AlignLeft
- EQ -> AlignCenter
- GT -> AlignRight
- endOfCell
- return $ (align, B.plain (B.trimInlines $ mconcat content))
- where
- cellEnd = (void newline <|> (many1 space *> endOfCell))
-
-endOfCell :: T2T ()
-endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
-
--- Raw area
-
-verbatim :: T2T Blocks
-verbatim = genericBlock anyLineNewline B.codeBlock "```"
-
-rawBlock :: T2T Blocks
-rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
-
-taggedBlock :: T2T Blocks
-taggedBlock = do
- target <- getTarget
- genericBlock anyLineNewline (B.rawBlock target) "'''"
-
--- Generic
-
-genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
-genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
-
-blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
-blockMarkupArea p f s = try $ (do
- string s *> blankline
- f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
-
-blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
-blockMarkupLine p f s = try (f <$> (string s *> space *> p))
-
--- Can be in either block or inline position
-comment :: Monoid a => T2T a
-comment = try $ do
- atStart
- notFollowedBy macro
- mempty <$ (char '%' *> anyLine)
-
--- Inline
-
-parseInlines :: T2T Inlines
-parseInlines = trimInlines . mconcat <$> many1 inline
-
-inline :: T2T Inlines
-inline = do
- choice
- [ endline
- , macro
- , commentLine
- , whitespace
- , url
- , link
- , image
- , bold
- , underline
- , code
- , raw
- , tagged
- , strike
- , italic
- , code
- , str
- , symbol
- ]
-
-bold :: T2T Inlines
-bold = inlineMarkup inline B.strong '*' (B.str)
-
-underline :: T2T Inlines
-underline = inlineMarkup inline B.emph '_' (B.str)
-
-strike :: T2T Inlines
-strike = inlineMarkup inline B.strikeout '-' (B.str)
-
-italic :: T2T Inlines
-italic = inlineMarkup inline B.emph '/' (B.str)
-
-code :: T2T Inlines
-code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
-
-raw :: T2T Inlines
-raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
-
-tagged :: T2T Inlines
-tagged = do
- target <- getTarget
- inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
-
--- Parser for markup indicated by a double character.
--- Inline markup is greedy and glued
--- Greedy meaning ***a*** = Bold [Str "*a*"]
--- Glued meaning that markup must be tight to content
--- Markup can't pass newlines
-inlineMarkup :: Monoid a
- => (T2T a) -- Content parser
- -> (a -> Inlines) -- Constructor
- -> Char -- Fence
- -> (String -> a) -- Special Case to handle ******
- -> T2T Inlines
-inlineMarkup p f c special = try $ do
- start <- many1 (char c)
- let l = length start
- guard (l >= 2)
- when (l == 2) (void $ notFollowedBy space)
- -- We must make sure that there is no space before the start of the
- -- closing tags
- body <- optionMaybe (try $ manyTill (noneOf "\n\r") $
- (try $ lookAhead (noneOf " " >> string [c,c] )))
- case body of
- Just middle -> do
- lastChar <- anyChar
- end <- many1 (char c)
- let parser inp = parseFromString (mconcat <$> many p) inp
- let start' = case drop 2 start of
- "" -> mempty
- xs -> special xs
- body' <- parser (middle ++ [lastChar])
- let end' = case drop 2 end of
- "" -> mempty
- xs -> special xs
- return $ f (start' <> body' <> end')
- Nothing -> do -- Either bad or case such as *****
- guard (l >= 5)
- let body' = (replicate (l - 4) c)
- return $ f (special body')
-
-link :: T2T Inlines
-link = try imageLink <|> titleLink
-
--- Link with title
-titleLink :: T2T Inlines
-titleLink = try $ do
- char '['
- notFollowedBy space
- tokens <- sepBy1 (many $ noneOf " ]") space
- guard (length tokens >= 2)
- char ']'
- let link' = last tokens
- guard (length link' > 0)
- let tit = concat (intersperse " " (init tokens))
- return $ B.link link' "" (B.text tit)
-
--- Link with image
-imageLink :: T2T Inlines
-imageLink = try $ do
- char '['
- body <- image
- many1 space
- l <- manyTill (noneOf "\n\r ") (char ']')
- return (B.link l "" body)
-
-macro :: T2T Inlines
-macro = try $ do
- name <- string "%%" *> oneOfStringsCI (map fst commands)
- optional (try $ enclosed (char '(') (char ')') anyChar)
- lookAhead (spaceChar <|> oneOf specialChars <|> newline)
- maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
- where
- commands = [ ("date", date), ("mtime", mtime)
- , ("infile", infile), ("outfile", outfile)]
-
--- raw URLs in text are automatically linked
-url :: T2T Inlines
-url = try $ do
- (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
- return $ B.link rawUrl "" (B.str escapedUrl)
-
-uri :: T2T (String, String)
-uri = try $ do
- address <- t2tURI
- return (address, escapeURI address)
-
--- The definition of a URI in the T2T source differs from the
--- actual definition. This is a transcription of the definition in
--- the source of v2.6
---isT2TURI :: String -> Bool
---isT2TURI (parse t2tURI "" -> Right _) = True
---isT2TURI _ = False
-
-t2tURI :: T2T String
-t2tURI = do
- start <- try ((++) <$> proto <*> urlLogin) <|> guess
- domain <- many1 chars
- sep <- many (char '/')
- form' <- option mempty ((:) <$> char '?' <*> many1 form)
- anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
- return (start ++ domain ++ sep ++ form' ++ anchor')
- where
- protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
- proto = (++) <$> oneOfStrings protos <*> string "://"
- guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
- <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
- login = alphaNum <|> oneOf "_.-"
- pass = many (noneOf " @")
- chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
- anchor = alphaNum <|> oneOf "%._0"
- form = chars <|> oneOf ";*"
- urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
-
-
-image :: T2T Inlines
-image = try $ do
- -- List taken from txt2tags source
- let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
- char '['
- path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
- ext <- oneOfStrings extensions
- char ']'
- return $ B.image (path ++ ext) "" mempty
-
--- Characters used in markup
-specialChars :: String
-specialChars = "%*-_/|:+;"
-
-tab :: T2T Char
-tab = char '\t'
-
-space :: T2T Char
-space = char ' '
-
-spaces :: T2T String
-spaces = many space
-
-endline :: T2T Inlines
-endline = try $ do
- newline
- notFollowedBy blankline
- notFollowedBy hrule
- notFollowedBy title
- notFollowedBy verbatim
- notFollowedBy rawBlock
- notFollowedBy taggedBlock
- notFollowedBy quote
- notFollowedBy list
- notFollowedBy table
- return $ B.softbreak
-
-str :: T2T Inlines
-str = try $ do
- B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
-
-whitespace :: T2T Inlines
-whitespace = try $ B.space <$ spaceChar
-
-symbol :: T2T Inlines
-symbol = B.str . (:[]) <$> oneOf specialChars
-
--- Utility
-
-getTarget :: T2T String
-getTarget = do
- mv <- lookupMeta "target" . stateMeta <$> getState
- let MetaString target = fromMaybe (MetaString "html") mv
- return target
-
-atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
-
-ignoreSpacesCap :: T2T String -> T2T String
-ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
deleted file mode 100644
index 85b298a85..000000000
--- a/src/Text/Pandoc/SelfContained.hs
+++ /dev/null
@@ -1,181 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.SelfContained
- Copyright : Copyright (C) 2011-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for converting an HTML file into one that can be viewed
-offline, by incorporating linked images, CSS, and scripts into
-the HTML using data URIs.
--}
-module Text.Pandoc.SelfContained ( makeSelfContained ) where
-import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString, URI(..), parseURI)
-import Data.ByteString.Base64
-import qualified Data.ByteString.Char8 as B
-import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, takeDirectory, (</>))
-import Data.Char (toLower, isAscii, isAlphaNum)
-import Codec.Compression.GZip as Gzip
-import qualified Data.ByteString.Lazy as L
-import Control.Monad.Trans (MonadIO(..))
-import Text.Pandoc.Shared (renderTags', err, warn, trim)
-import Text.Pandoc.MediaBag (MediaBag)
-import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.UTF8 (toString)
-import Text.Pandoc.Options (WriterOptions(..))
-import Data.List (isPrefixOf)
-import Control.Applicative ((<|>))
-import Text.Parsec (runParserT, ParsecT)
-import qualified Text.Parsec as P
-import Control.Monad.Trans (lift)
-import Text.Pandoc.Class (fetchItem, runIO, setMediaBag)
-
-isOk :: Char -> Bool
-isOk c = isAscii c && isAlphaNum c
-
-makeDataURI :: String -> ByteString -> String
-makeDataURI mime raw =
- if textual
- then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
- else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
- where textual = "text/" `Data.List.isPrefixOf` mime
- mime' = if textual && ';' `notElem` mime
- then mime ++ ";charset=utf-8"
- else mime -- mime type already has charset
-
-convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
-convertTag media sourceURL t@(TagOpen tagname as)
- | tagname `elem`
- ["img", "embed", "video", "input", "audio", "source", "track"] = do
- as' <- mapM processAttribute as
- return $ TagOpen tagname as'
- where processAttribute (x,y) =
- if x == "src" || x == "href" || x == "poster"
- then do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) y
- return (x, enc)
- else return (x,y)
-convertTag media sourceURL t@(TagOpen "script" as) =
- case fromAttrib "src" t of
- [] -> return t
- src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media sourceURL t@(TagOpen "link" as) =
- case fromAttrib "href" t of
- [] -> return t
- src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ _ t = return t
-
-cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
- -> IO ByteString
-cssURLs media sourceURL d orig = do
- res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
- case res of
- Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
- Right bs -> return bs
-
-parseCSSUrls :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-parseCSSUrls media sourceURL d = B.concat <$> P.many
- (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
-
--- Note: some whitespace in CSS is significant, so we can't collapse it!
-pCSSWhite :: ParsecT ByteString () IO ByteString
-pCSSWhite = B.singleton <$> P.space <* P.spaces
-
-pCSSComment :: ParsecT ByteString () IO ByteString
-pCSSComment = P.try $ do
- P.string "/*"
- P.manyTill P.anyChar (P.try (P.string "*/"))
- return B.empty
-
-pCSSOther :: ParsecT ByteString () IO ByteString
-pCSSOther = do
- (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
- (B.singleton <$> P.char 'u') <|>
- (B.singleton <$> P.char '/')
-
-pCSSUrl :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-pCSSUrl media sourceURL d = P.try $ do
- P.string "url("
- P.spaces
- quote <- P.option Nothing (Just <$> P.oneOf "\"'")
- url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote)
- P.spaces
- P.char ')'
- let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
- maybe "" (:[]) quote ++ ")")
- case trim url of
- '#':_ -> return fallback
- 'd':'a':'t':'a':':':_ -> return fallback
- u -> do let url' = if isURI u then u else d </> u
- enc <- lift $ getDataURI media sourceURL "" url'
- return (B.pack $ "url(" ++ enc ++ ")")
-
-
-getDataURI :: MediaBag -> Maybe String -> MimeType -> String
- -> IO String
-getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri
-getDataURI media sourceURL mimetype src = do
- let ext = map toLower $ takeExtension src
- fetchResult <- runIO $ do setMediaBag media
- fetchItem sourceURL src
- (raw, respMime) <- case fetchResult of
- Left msg -> err 67 $ "Could not fetch " ++ src ++
- "\n" ++ show msg
- Right x -> return x
- let raw' = if ext == ".gz"
- then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
- $ [raw]
- else raw
- let mime = case (mimetype, respMime) of
- ("",Nothing) -> error
- $ "Could not determine mime type for `" ++ src ++ "'"
- (x, Nothing) -> x
- (_, Just x ) -> x
- let cssSourceURL = case parseURI src of
- Just u
- | uriScheme u `elem` ["http:","https:"] ->
- Just $ show u{ uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- _ -> Nothing
- result <- if mime == "text/css"
- then cssURLs media cssSourceURL (takeDirectory src) raw'
- else return raw'
- return $ makeDataURI mime result
-
--- | Convert HTML into self-contained HTML, incorporating images,
--- scripts, and CSS using data: URIs.
-makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String
-makeSelfContained opts mediabag inp = liftIO $ do
- let tags = parseTags inp
- out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags
- return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
deleted file mode 100644
index 268a5052e..000000000
--- a/src/Text/Pandoc/Shared.hs
+++ /dev/null
@@ -1,883 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables, PatternGuards,
- ViewPatterns #-}
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Shared
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Utility functions and definitions used by the various Pandoc modules.
--}
-module Text.Pandoc.Shared (
- -- * List processing
- splitBy,
- splitByIndices,
- splitStringByIndices,
- substitute,
- ordNub,
- -- * Text processing
- backslashEscapes,
- escapeStringUsing,
- stripTrailingNewlines,
- trim,
- triml,
- trimr,
- stripFirstAndLast,
- camelCaseToHyphenated,
- toRomanNumeral,
- escapeURI,
- tabFilter,
- -- * Date/time
- normalizeDate,
- -- * Pandoc block and inline list processing
- orderedListMarkers,
- normalizeSpaces,
- extractSpaces,
- removeFormatting,
- deNote,
- stringify,
- capitalize,
- compactify,
- compactifyDL,
- linesToPara,
- Element (..),
- hierarchicalize,
- uniqueIdent,
- inlineListToIdentifier,
- isHeaderBlock,
- headerShift,
- isTightList,
- addMetaField,
- makeMeta,
- -- * TagSoup HTML handling
- renderTags',
- -- * File handling
- inDirectory,
- getDefaultReferenceDocx,
- getDefaultReferenceODT,
- readDataFile,
- readDataFileUTF8,
- openURL,
- collapseFilePath,
- filteredFilesFromArchive,
- -- * Error handling
- err,
- warn,
- mapLeft,
- -- * for squashing blocks
- blocksToInlines,
- -- * Safe read
- safeRead,
- -- * Temp directory
- withTempDir,
- -- * Version
- pandocVersion
- ) where
-
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import System.Exit (exitWith, ExitCode(..))
-import Data.Char ( toLower, isLower, isUpper, isAlpha,
- isLetter, isDigit, isSpace )
-import Data.List ( find, stripPrefix, intercalate )
-import Data.Maybe (mapMaybe)
-import Data.Version ( showVersion )
-import qualified Data.Map as M
-import Network.URI ( escapeURIString, unEscapeString )
-import qualified Data.Set as Set
-import System.Directory
-import System.FilePath (splitDirectories, isPathSeparator)
-import qualified System.FilePath.Posix as Posix
-import Text.Pandoc.MIME (MimeType)
-import System.FilePath ( (</>) )
-import Data.Generics (Typeable, Data)
-import qualified Control.Monad.State as S
-import Control.Monad.Trans (MonadIO (..))
-import qualified Control.Exception as E
-import Control.Monad (msum, unless, MonadPlus(..))
-import Text.Pandoc.Pretty (charWidth)
-import Text.Pandoc.Compat.Time
-import Data.Time.Clock.POSIX
-import System.IO (stderr)
-import System.IO.Temp
-import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
- renderOptions)
-import Data.Monoid ((<>))
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as B8
-import Data.ByteString.Base64 (decodeLenient)
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-import qualified Data.Text as T (toUpper, pack, unpack)
-import Data.ByteString.Lazy (toChunks, fromChunks)
-import qualified Data.ByteString.Lazy as BL
-import Paths_pandoc (version)
-
-import Codec.Archive.Zip
-
-#ifdef EMBED_DATA_FILES
-import Text.Pandoc.Data (dataFiles)
-#else
-import Paths_pandoc (getDataFileName)
-#endif
-#ifdef HTTP_CLIENT
-import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
- Request(port,host,requestHeaders))
-import Network.HTTP.Client (parseRequest)
-import Network.HTTP.Client (newManager)
-import Network.HTTP.Client.Internal (addProxy)
-import Network.HTTP.Client.TLS (tlsManagerSettings)
-import System.Environment (getEnv)
-import Network.HTTP.Types.Header ( hContentType, hUserAgent)
-import Network (withSocketsDo)
-#else
-import Network.URI (parseURI)
-import Network.HTTP (findHeader, rspBody,
- RequestMethod(..), HeaderName(..), mkRequest)
-import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
-#endif
-
--- | Version number of pandoc library.
-pandocVersion :: String
-pandocVersion = showVersion version
-
---
--- List processing
---
-
--- | Split list by groups of one or more sep.
-splitBy :: (a -> Bool) -> [a] -> [[a]]
-splitBy _ [] = []
-splitBy isSep lst =
- let (first, rest) = break isSep lst
- rest' = dropWhile isSep rest
- in first:(splitBy isSep rest')
-
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
- where (first, rest) = splitAt x lst
-
--- | Split string into chunks divided at specified indices.
-splitStringByIndices :: [Int] -> [Char] -> [[Char]]
-splitStringByIndices [] lst = [lst]
-splitStringByIndices (x:xs) lst =
- let (first, rest) = splitAt' x lst in
- first : (splitStringByIndices (map (\y -> y - x) xs) rest)
-
-splitAt' :: Int -> [Char] -> ([Char],[Char])
-splitAt' _ [] = ([],[])
-splitAt' n xs | n <= 0 = ([],xs)
-splitAt' n (x:xs) = (x:ys,zs)
- where (ys,zs) = splitAt' (n - charWidth x) xs
-
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ xs = xs
-substitute target replacement lst@(x:xs) =
- case stripPrefix target lst of
- Just lst' -> replacement ++ substitute target replacement lst'
- Nothing -> x : substitute target replacement xs
-
-ordNub :: (Ord a) => [a] -> [a]
-ordNub l = go Set.empty l
- where
- go _ [] = []
- go s (x:xs) = if x `Set.member` s then go s xs
- else x : go (Set.insert x s) xs
-
---
--- Text processing
---
-
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char] -- ^ list of special characters to escape
- -> [(Char, String)]
-backslashEscapes = map (\ch -> (ch, ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, String)] -> String -> String
-escapeStringUsing _ [] = ""
-escapeStringUsing escapeTable (x:xs) =
- case (lookup x escapeTable) of
- Just str -> str ++ rest
- Nothing -> x:rest
- where rest = escapeStringUsing escapeTable xs
-
--- | Strip trailing newlines from string.
-stripTrailingNewlines :: String -> String
-stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
-
--- | Remove leading and trailing space (including newlines) from string.
-trim :: String -> String
-trim = triml . trimr
-
--- | Remove leading space (including newlines) from string.
-triml :: String -> String
-triml = dropWhile (`elem` " \r\n\t")
-
--- | Remove trailing space (including newlines) from string.
-trimr :: String -> String
-trimr = reverse . triml . reverse
-
--- | Strip leading and trailing characters from string
-stripFirstAndLast :: String -> String
-stripFirstAndLast str =
- drop 1 $ take ((length str) - 1) str
-
--- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
-camelCaseToHyphenated :: String -> String
-camelCaseToHyphenated [] = ""
-camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
- a:'-':(toLower b):(camelCaseToHyphenated rest)
-camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
-
--- | Convert number < 4000 to uppercase roman numeral.
-toRomanNumeral :: Int -> String
-toRomanNumeral x
- | x >= 4000 || x < 0 = "?"
- | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
- | x >= 900 = "CM" ++ toRomanNumeral (x - 900)
- | x >= 500 = "D" ++ toRomanNumeral (x - 500)
- | x >= 400 = "CD" ++ toRomanNumeral (x - 400)
- | x >= 100 = "C" ++ toRomanNumeral (x - 100)
- | x >= 90 = "XC" ++ toRomanNumeral (x - 90)
- | x >= 50 = "L" ++ toRomanNumeral (x - 50)
- | x >= 40 = "XL" ++ toRomanNumeral (x - 40)
- | x >= 10 = "X" ++ toRomanNumeral (x - 10)
- | x == 9 = "IX"
- | x >= 5 = "V" ++ toRomanNumeral (x - 5)
- | x == 4 = "IV"
- | x >= 1 = "I" ++ toRomanNumeral (x - 1)
- | otherwise = ""
-
--- | Escape whitespace and some punctuation characters in URI.
-escapeURI :: String -> String
-escapeURI = escapeURIString (not . needsEscaping)
- where needsEscaping c = isSpace c || c `elem`
- ['<','>','|','"','{','}','[',']','^', '`']
-
-
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
-tabFilter :: Int -- ^ Tab stop
- -> String -- ^ Input
- -> String
-tabFilter tabStop =
- let go _ [] = ""
- go _ ('\n':xs) = '\n' : go tabStop xs
- go _ ('\r':'\n':xs) = '\n' : go tabStop xs
- go _ ('\r':xs) = '\n' : go tabStop xs
- go spsToNextStop ('\t':xs) =
- if tabStop == 0
- then '\t' : go tabStop xs
- else replicate spsToNextStop ' ' ++ go tabStop xs
- go 1 (x:xs) =
- x : go tabStop xs
- go spsToNextStop (x:xs) =
- x : go (spsToNextStop - 1) xs
- in go tabStop
-
---
--- Date/time
---
-
--- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
--- limit years to the range 1601-9999 (ISO 8601 accepts greater than
--- or equal to 1583, but MS Word only accepts dates starting 1601).
-normalizeDate :: String -> Maybe String
-normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
- (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
- where rejectBadYear day = case toGregorian day of
- (y, _, _) | y >= 1601 && y <= 9999 -> Just day
- _ -> Nothing
- parsetimeWith =
-#if MIN_VERSION_time(1,5,0)
- parseTimeM True defaultTimeLocale
-#else
- parseTime defaultTimeLocale
-#endif
- formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
- "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
- "%Y%m%d", "%Y%m", "%Y"]
-
---
--- Pandoc block and inline list processing
---
-
--- | Generate infinite lazy list of markers for an ordered list,
--- depending on list attributes.
-orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
-orderedListMarkers (start, numstyle, numdelim) =
- let singleton c = [c]
- nums = case numstyle of
- DefaultStyle -> map show [start..]
- Example -> map show [start..]
- Decimal -> map show [start..]
- UpperAlpha -> drop (start - 1) $ cycle $
- map singleton ['A'..'Z']
- LowerAlpha -> drop (start - 1) $ cycle $
- map singleton ['a'..'z']
- UpperRoman -> map toRomanNumeral [start..]
- LowerRoman -> map (map toLower . toRomanNumeral) [start..]
- inDelim str = case numdelim of
- DefaultDelim -> str ++ "."
- Period -> str ++ "."
- OneParen -> str ++ ")"
- TwoParens -> "(" ++ str ++ ")"
- in map inDelim nums
-
--- | Normalize a list of inline elements: remove leading and trailing
--- @Space@ elements, collapse double @Space@s into singles, and
--- remove empty Str elements.
-normalizeSpaces :: [Inline] -> [Inline]
-normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
- where cleanup [] = []
- cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
- [] -> []
- (x:xs) -> Space : x : cleanup xs
- cleanup ((Str ""):rest) = cleanup rest
- cleanup (x:rest) = x : cleanup rest
-
-isSpaceOrEmpty :: Inline -> Bool
-isSpaceOrEmpty Space = True
-isSpaceOrEmpty (Str "") = True
-isSpaceOrEmpty _ = False
-
--- | Extract the leading and trailing spaces from inside an inline element
--- and place them outside the element. SoftBreaks count as Spaces for
--- these purposes.
-extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-extractSpaces f is =
- let contents = B.unMany is
- left = case viewl contents of
- (Space :< _) -> B.space
- (SoftBreak :< _) -> B.softbreak
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> B.space
- (_ :> SoftBreak) -> B.softbreak
- _ -> mempty in
- (left <> f (B.trimInlines . B.Many $ contents) <> right)
-
--- | Extract inlines, removing formatting.
-removeFormatting :: Walkable Inline a => a -> [Inline]
-removeFormatting = query go . walk deNote
- where go :: Inline -> [Inline]
- go (Str xs) = [Str xs]
- go Space = [Space]
- go SoftBreak = [SoftBreak]
- go (Code _ x) = [Str x]
- go (Math _ x) = [Str x]
- go LineBreak = [Space]
- go _ = []
-
-deNote :: Inline -> Inline
-deNote (Note _) = Str ""
-deNote x = x
-
--- | Convert pandoc structure to a string with formatting removed.
--- Footnotes are skipped (since we don't want their contents in link
--- labels).
-stringify :: Walkable Inline a => a -> String
-stringify = query go . walk deNote
- where go :: Inline -> [Char]
- go Space = " "
- go SoftBreak = " "
- go (Str x) = x
- go (Code _ x) = x
- go (Math _ x) = x
- go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
- go LineBreak = " "
- go _ = ""
-
--- | Bring all regular text in a pandoc structure to uppercase.
---
--- This function correctly handles cases where a lowercase character doesn't
--- match to a single uppercase character – e.g. “Straße” would be converted
--- to “STRASSE”, not “STRAßE”.
-capitalize :: Walkable Inline a => a -> a
-capitalize = walk go
- where go :: Inline -> Inline
- go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
- go x = x
-
--- | Change final list item from @Para@ to @Plain@ if the list contains
--- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
--- than @[Block]@.
-compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
- -> [Blocks]
-compactify [] = []
-compactify items =
- let (others, final) = (init items, last items)
- in case reverse (B.toList final) of
- (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
- -- if this is only Para, change to Plain
- [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
- _ -> items
- _ -> items
-
--- | Like @compactify@, but acts on items of definition lists.
-compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
-compactifyDL items =
- let defs = concatMap snd items
- in case reverse (concatMap B.toList defs) of
- (Para x:xs)
- | not (any isPara xs) ->
- let (t,ds) = last items
- lastDef = B.toList $ last ds
- ds' = init ds ++
- if null lastDef
- then [B.fromList lastDef]
- else [B.fromList $ init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- | otherwise -> items
- _ -> items
-
--- | Combine a list of lines by adding hard linebreaks.
-combineLines :: [[Inline]] -> [Inline]
-combineLines = intercalate [LineBreak]
-
--- | Convert a list of lines into a paragraph with hard line breaks. This is
--- useful e.g. for rudimentary support of LineBlock elements in writers.
-linesToPara :: [[Inline]] -> Block
-linesToPara = Para . combineLines
-
-isPara :: Block -> Bool
-isPara (Para _) = True
-isPara _ = False
-
--- | Data structure for defining hierarchical Pandoc documents
-data Element = Blk Block
- | Sec Int [Int] Attr [Inline] [Element]
- -- lvl num attributes label contents
- deriving (Eq, Read, Show, Typeable, Data)
-
-instance Walkable Inline Element where
- walk f (Blk x) = Blk (walk f x)
- walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
- walkM f (Blk x) = Blk `fmap` walkM f x
- walkM f (Sec lev nums attr ils elts) = do
- ils' <- walkM f ils
- elts' <- walkM f elts
- return $ Sec lev nums attr ils' elts'
- query f (Blk x) = query f x
- query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-
-instance Walkable Block Element where
- walk f (Blk x) = Blk (walk f x)
- walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
- walkM f (Blk x) = Blk `fmap` walkM f x
- walkM f (Sec lev nums attr ils elts) = do
- ils' <- walkM f ils
- elts' <- walkM f elts
- return $ Sec lev nums attr ils' elts'
- query f (Blk x) = query f x
- query f (Sec _ _ _ ils elts) = query f ils <> query f elts
-
-
--- | Convert Pandoc inline list to plain text identifier. HTML
--- identifiers must start with a letter, and may contain only
--- letters, digits, and the characters _-.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier =
- dropWhile (not . isAlpha) . intercalate "-" . words .
- map (nbspToSp . toLower) .
- filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
- stringify
- where nbspToSp '\160' = ' '
- nbspToSp x = x
-
--- | Convert list of Pandoc blocks into (hierarchical) list of Elements
-hierarchicalize :: [Block] -> [Element]
-hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
-
-hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
-hierarchicalizeWithIds [] = return []
-hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
- lastnum <- S.get
- let lastnum' = take level lastnum
- let newnum = case length lastnum' of
- x | "unnumbered" `elem` classes -> []
- | x >= level -> init lastnum' ++ [last lastnum' + 1]
- | otherwise -> lastnum ++
- replicate (level - length lastnum - 1) 0 ++ [1]
- unless (null newnum) $ S.put newnum
- let (sectionContents, rest) = break (headerLtEq level) xs
- sectionContents' <- hierarchicalizeWithIds sectionContents
- rest' <- hierarchicalizeWithIds rest
- return $ Sec level newnum attr title' sectionContents' : rest'
-hierarchicalizeWithIds ((Div ("",["references"],[])
- (Header level (ident,classes,kvs) title' : xs)):ys) =
- hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
- title') : (xs ++ ys))
-hierarchicalizeWithIds (x:rest) = do
- rest' <- hierarchicalizeWithIds rest
- return $ (Blk x) : rest'
-
-headerLtEq :: Int -> Block -> Bool
-headerLtEq level (Header l _ _) = l <= level
-headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
-headerLtEq _ _ = False
-
--- | Generate a unique identifier from a list of inlines.
--- Second argument is a list of already used identifiers.
-uniqueIdent :: [Inline] -> Set.Set String -> String
-uniqueIdent title' usedIdents
- = let baseIdent = case inlineListToIdentifier title' of
- "" -> "section"
- x -> x
- numIdent n = baseIdent ++ "-" ++ show n
- in if baseIdent `Set.member` usedIdents
- then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
- Just x -> numIdent x
- Nothing -> baseIdent -- if we have more than 60,000, allow repeats
- else baseIdent
-
--- | True if block is a Header block.
-isHeaderBlock :: Block -> Bool
-isHeaderBlock (Header _ _ _) = True
-isHeaderBlock _ = False
-
--- | Shift header levels up or down.
-headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = walk shift
- where shift :: Block -> Block
- shift (Header level attr inner) = Header (level + n) attr inner
- shift x = x
-
--- | Detect if a list is tight.
-isTightList :: [[Block]] -> Bool
-isTightList = all firstIsPlain
- where firstIsPlain (Plain _ : _) = True
- firstIsPlain _ = False
-
--- | Set a field of a 'Meta' object. If the field already has a value,
--- convert it into a list with the new value appended to the old value(s).
-addMetaField :: ToMetaValue a
- => String
- -> a
- -> Meta
- -> Meta
-addMetaField key val (Meta meta) =
- Meta $ M.insertWith combine key (toMetaValue val) meta
- where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
- combine newval x = MetaList [x, newval]
- tolist (MetaList ys) = ys
- tolist y = [y]
-
--- | Create 'Meta' from old-style title, authors, date. This is
--- provided to ease the transition from the old API.
-makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
-makeMeta title authors date =
- addMetaField "title" (B.fromList title)
- $ addMetaField "author" (map B.fromList authors)
- $ addMetaField "date" (B.fromList date)
- $ nullMeta
-
---
--- TagSoup HTML handling
---
-
--- | Render HTML tags.
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = matchTags ["hr", "br", "img",
- "meta", "link"]
- , optRawTag = matchTags ["script", "style"] }
- where matchTags = \tags -> flip elem tags . map toLower
-
---
--- File handling
---
-
--- | Perform an IO action in a directory, returning to starting directory.
-inDirectory :: FilePath -> IO a -> IO a
-inDirectory path action = E.bracket
- getCurrentDirectory
- setCurrentDirectory
- (const $ setCurrentDirectory path >> action)
-
-getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
-getDefaultReferenceDocx datadir = do
- let paths = ["[Content_Types].xml",
- "_rels/.rels",
- "docProps/app.xml",
- "docProps/core.xml",
- "word/document.xml",
- "word/fontTable.xml",
- "word/footnotes.xml",
- "word/numbering.xml",
- "word/settings.xml",
- "word/webSettings.xml",
- "word/styles.xml",
- "word/_rels/document.xml.rels",
- "word/_rels/footnotes.xml.rels",
- "word/theme/theme1.xml"]
- let toLazy = fromChunks . (:[])
- let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
- getCurrentTime
- contents <- toLazy <$> readDataFile datadir
- ("docx/" ++ path)
- return $ toEntry path epochtime contents
- mbArchive <- case datadir of
- Nothing -> return Nothing
- Just d -> do
- exists <- doesFileExist (d </> "reference.docx")
- if exists
- then return (Just (d </> "reference.docx"))
- else return Nothing
- case mbArchive of
- Just arch -> toArchive <$> BL.readFile arch
- Nothing -> foldr addEntryToArchive emptyArchive <$>
- mapM pathToEntry paths
-
-getDefaultReferenceODT :: Maybe FilePath -> IO Archive
-getDefaultReferenceODT datadir = do
- let paths = ["mimetype",
- "manifest.rdf",
- "styles.xml",
- "content.xml",
- "meta.xml",
- "settings.xml",
- "Configurations2/accelerator/current.xml",
- "Thumbnails/thumbnail.png",
- "META-INF/manifest.xml"]
- let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
- contents <- (fromChunks . (:[])) `fmap`
- readDataFile datadir ("odt/" ++ path)
- return $ toEntry path epochtime contents
- mbArchive <- case datadir of
- Nothing -> return Nothing
- Just d -> do
- exists <- doesFileExist (d </> "reference.odt")
- if exists
- then return (Just (d </> "reference.odt"))
- else return Nothing
- case mbArchive of
- Just arch -> toArchive <$> BL.readFile arch
- Nothing -> foldr addEntryToArchive emptyArchive <$>
- mapM pathToEntry paths
-
-
-readDefaultDataFile :: FilePath -> IO BS.ByteString
-readDefaultDataFile "reference.docx" =
- (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
-readDefaultDataFile "reference.odt" =
- (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-readDefaultDataFile fname =
-#ifdef EMBED_DATA_FILES
- case lookup (makeCanonical fname) dataFiles of
- Nothing -> err 97 $ "Could not find data file " ++ fname
- Just contents -> return contents
- where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- transformPathParts = reverse . foldl go []
- go as "." = as
- go (_:as) ".." = as
- go as x = x : as
-#else
- getDataFileName fname' >>= checkExistence >>= BS.readFile
- where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-
-checkExistence :: FilePath -> IO FilePath
-checkExistence fn = do
- exists <- doesFileExist fn
- if exists
- then return fn
- else err 97 ("Could not find data file " ++ fn)
-#endif
-
--- | Read file from specified user data directory or, if not found there, from
--- Cabal data directory.
-readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-readDataFile Nothing fname = readDefaultDataFile fname
-readDataFile (Just userDir) fname = do
- exists <- doesFileExist (userDir </> fname)
- if exists
- then BS.readFile (userDir </> fname)
- else readDefaultDataFile fname
-
--- | Same as 'readDataFile' but returns a String instead of a ByteString.
-readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
-readDataFileUTF8 userDir fname =
- UTF8.toString `fmap` readDataFile userDir fname
-
--- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-openURL u
- | Just u'' <- stripPrefix "data:" u =
- let mime = takeWhile (/=',') u''
- contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
- in return $ Right (decodeLenient contents, Just mime)
-#ifdef HTTP_CLIENT
- | otherwise = withSocketsDo $ E.try $ do
- let parseReq = parseRequest
- (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
- (useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT"
- req <- parseReq u
- req' <- case proxy of
- Left _ -> return req
- Right pr -> (parseReq pr >>= \r ->
- return $ addProxy (host r) (port r) req)
- `mplus` return req
- req'' <- case useragent of
- Left _ -> return req'
- Right ua -> do
- let headers = requestHeaders req'
- let useragentheader = (hUserAgent, B8.pack ua)
- let headers' = useragentheader:headers
- return $ req' {requestHeaders = headers'}
- resp <- newManager tlsManagerSettings >>= httpLbs req''
- return (BS.concat $ toChunks $ responseBody resp,
- UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
-#else
- | otherwise = E.try $ getBodyAndMimeType `fmap` browse
- (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
- setOutHandler $ const (return ())
- setAllowRedirects True
- request (getRequest' u'))
- where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
- getRequest' uriString = case parseURI uriString of
- Nothing -> error ("Not a valid URL: " ++
- uriString)
- Just v -> mkRequest GET v
- u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
-#endif
-
---
--- Error reporting
---
-
-err :: MonadIO m => Int -> String -> m a
-err exitCode msg = liftIO $ do
- UTF8.hPutStrLn stderr msg
- exitWith $ ExitFailure exitCode
- return undefined
-
-warn :: MonadIO m => String -> m ()
-warn msg = liftIO $ do
- UTF8.hPutStrLn stderr $ "[warning] " ++ msg
-
-mapLeft :: (a -> b) -> Either a c -> Either b c
-mapLeft f (Left x) = Left (f x)
-mapLeft _ (Right x) = Right x
-
--- | Remove intermediate "." and ".." directories from a path.
---
--- > collapseFilePath "./foo" == "foo"
--- > collapseFilePath "/bar/../baz" == "/baz"
--- > collapseFilePath "/../baz" == "/../baz"
--- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
--- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
--- > collapseFilePath "parent/foo/.." == "parent"
--- > collapseFilePath "/parent/foo/../../bar" == "/bar"
-collapseFilePath :: FilePath -> FilePath
-collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
- where
- go rs "." = rs
- go r@(p:rs) ".." = case p of
- ".." -> ("..":r)
- (checkPathSeperator -> Just True) -> ("..":r)
- _ -> rs
- go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
- go rs x = x:rs
- isSingleton [] = Nothing
- isSingleton [x] = Just x
- isSingleton _ = Nothing
- checkPathSeperator = fmap isPathSeparator . isSingleton
-
---
--- File selection from the archive
---
-filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
-filteredFilesFromArchive zf f =
- mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
- where
- fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
- fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
-
----
---- Squash blocks into inlines
----
-
-blockToInlines :: Block -> [Inline]
-blockToInlines (Plain ils) = ils
-blockToInlines (Para ils) = ils
-blockToInlines (LineBlock lns) = combineLines lns
-blockToInlines (CodeBlock attr str) = [Code attr str]
-blockToInlines (RawBlock fmt str) = [RawInline fmt str]
-blockToInlines (BlockQuote blks) = blocksToInlines blks
-blockToInlines (OrderedList _ blkslst) =
- concatMap blocksToInlines blkslst
-blockToInlines (BulletList blkslst) =
- concatMap blocksToInlines blkslst
-blockToInlines (DefinitionList pairslst) =
- concatMap f pairslst
- where
- f (ils, blkslst) = ils ++
- [Str ":", Space] ++
- (concatMap blocksToInlines blkslst)
-blockToInlines (Header _ _ ils) = ils
-blockToInlines (HorizontalRule) = []
-blockToInlines (Table _ _ _ headers rows) =
- intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
- where
- tbl = headers : rows
-blockToInlines (Div _ blks) = blocksToInlines blks
-blockToInlines Null = []
-
-blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
-blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
-
-blocksToInlines :: [Block] -> [Inline]
-blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
-
-
---
--- Safe read
---
-
-safeRead :: (MonadPlus m, Read a) => String -> m a
-safeRead s = case reads s of
- (d,x):_
- | all isSpace x -> return d
- _ -> mzero
-
---
--- Temp directory
---
-
-withTempDir :: String -> (FilePath -> IO a) -> IO a
-withTempDir =
-#ifdef _WINDOWS
- withTempDirectory "."
-#else
- withSystemTempDirectory
-#endif
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
deleted file mode 100644
index e19dba3e2..000000000
--- a/src/Text/Pandoc/Slides.hs
+++ /dev/null
@@ -1,63 +0,0 @@
-{-
-Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Slides
- Copyright : Copyright (C) 2012-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Utility functions for splitting documents into slides for slide
-show formats (dzslides, revealjs, s5, slidy, slideous, beamer).
--}
-module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where
-import Text.Pandoc.Definition
-
--- | Find level of header that starts slides (defined as the least header
--- level that occurs before a non-header/non-hrule in the blocks).
-getSlideLevel :: [Block] -> Int
-getSlideLevel = go 6
- where go least (Header n _ _ : x : xs)
- | n < least && nonHOrHR x = go n xs
- | otherwise = go least (x:xs)
- go least (_ : xs) = go least xs
- go least [] = least
- nonHOrHR (Header{}) = False
- nonHOrHR (HorizontalRule) = False
- nonHOrHR _ = True
-
--- | Prepare a block list to be passed to hierarchicalize.
-prepSlides :: Int -> [Block] -> [Block]
-prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader
- where splitHrule (HorizontalRule : Header n attr xs : ys)
- | n == slideLevel = Header slideLevel attr xs : splitHrule ys
- splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] :
- splitHrule xs
- splitHrule (x : xs) = x : splitHrule xs
- splitHrule [] = []
- extractRefsHeader bs =
- case reverse bs of
- (Div ("",["references"],[]) (Header n attrs xs : ys) : zs)
- -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys])
- _ -> bs
- ensureStartWithH bs@(Header n _ _:_)
- | n <= slideLevel = bs
- ensureStartWithH bs = Header slideLevel nullAttr [Str "\0"] : bs
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
deleted file mode 100644
index 705ac54c9..000000000
--- a/src/Text/Pandoc/Templates.hs
+++ /dev/null
@@ -1,77 +0,0 @@
-{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
- OverloadedStrings, GeneralizedNewtypeDeriving #-}
-{-
-Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Templates
- Copyright : Copyright (C) 2009-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-A simple templating system with variable substitution and conditionals.
-
--}
-
-module Text.Pandoc.Templates ( renderTemplate
- , renderTemplate'
- , TemplateTarget
- , varListToJSON
- , compileTemplate
- , Template
- , getDefaultTemplate ) where
-
-import Text.DocTemplates (Template, TemplateTarget, compileTemplate,
- renderTemplate, applyTemplate,
- varListToJSON)
-import Data.Aeson (ToJSON(..))
-import qualified Data.Text as T
-import System.FilePath ((</>), (<.>))
-import qualified Control.Exception.Extensible as E (try, IOException)
-import Text.Pandoc.Shared (readDataFileUTF8)
-
--- | Get default template for the specified writer.
-getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
- -> String -- ^ Name of writer
- -> IO (Either E.IOException String)
-getDefaultTemplate user writer = do
- let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
- case format of
- "native" -> return $ Right ""
- "json" -> return $ Right ""
- "docx" -> return $ Right ""
- "fb2" -> return $ Right ""
- "odt" -> getDefaultTemplate user "opendocument"
- "html" -> getDefaultTemplate user "html5"
- "docbook" -> getDefaultTemplate user "docbook5"
- "epub" -> getDefaultTemplate user "epub3"
- "markdown_strict" -> getDefaultTemplate user "markdown"
- "multimarkdown" -> getDefaultTemplate user "markdown"
- "markdown_github" -> getDefaultTemplate user "markdown"
- "markdown_mmd" -> getDefaultTemplate user "markdown"
- "markdown_phpextra" -> getDefaultTemplate user "markdown"
- _ -> let fname = "templates" </> "default" <.> format
- in E.try $ readDataFileUTF8 user fname
-
--- | Like 'applyTemplate', but raising an error if compilation fails.
-renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b
-renderTemplate' template = either error id . applyTemplate (T.pack template)
-
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
deleted file mode 100644
index 62a662029..000000000
--- a/src/Text/Pandoc/UTF8.hs
+++ /dev/null
@@ -1,121 +0,0 @@
-{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.UTF8
- Copyright : Copyright (C) 2010-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
--}
-module Text.Pandoc.UTF8 ( readFile
- , writeFile
- , getContents
- , putStr
- , putStrLn
- , hPutStr
- , hPutStrLn
- , hGetContents
- , toString
- , fromString
- , toStringLazy
- , fromStringLazy
- , encodePath
- , decodeArg
- )
-
-where
-
-import System.IO hiding (readFile, writeFile, getContents,
- putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
-import qualified System.IO as IO
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.Text.Encoding as T
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-
-readFile :: FilePath -> IO String
-readFile f = do
- h <- openFile (encodePath f) ReadMode
- hGetContents h
-
-writeFile :: FilePath -> String -> IO ()
-writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s
-
-getContents :: IO String
-getContents = hGetContents stdin
-
-putStr :: String -> IO ()
-putStr s = hPutStr stdout s
-
-putStrLn :: String -> IO ()
-putStrLn s = hPutStrLn stdout s
-
-hPutStr :: Handle -> String -> IO ()
-hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s
-
-hPutStrLn :: Handle -> String -> IO ()
-hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
-
-hGetContents :: Handle -> IO String
-hGetContents = fmap toString . B.hGetContents
--- hGetContents h = hSetEncoding h utf8_bom
--- >> hSetNewlineMode h universalNewlineMode
--- >> IO.hGetContents h
-
--- | Drop BOM (byte order marker) if present at beginning of string.
--- Note that Data.Text converts the BOM to code point FEFF, zero-width
--- no-break space, so if the string begins with this we strip it off.
-dropBOM :: String -> String
-dropBOM ('\xFEFF':xs) = xs
-dropBOM xs = xs
-
-filterCRs :: String -> String
-filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
-filterCRs ('\r':xs) = '\n' : filterCRs xs
-filterCRs (x:xs) = x : filterCRs xs
-filterCRs [] = []
-
--- | Convert UTF8-encoded ByteString to String, also
--- removing '\r' characters.
-toString :: B.ByteString -> String
-toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
-
-fromString :: String -> B.ByteString
-fromString = T.encodeUtf8 . T.pack
-
--- | Convert UTF8-encoded ByteString to String, also
--- removing '\r' characters.
-toStringLazy :: BL.ByteString -> String
-toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
-
-fromStringLazy :: String -> BL.ByteString
-fromStringLazy = TL.encodeUtf8 . TL.pack
-
-encodePath :: FilePath -> FilePath
-encodePath = id
-
-decodeArg :: String -> String
-decodeArg = id
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
deleted file mode 100644
index 8de102742..000000000
--- a/src/Text/Pandoc/UUID.hs
+++ /dev/null
@@ -1,78 +0,0 @@
-{-
-Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.UUID
- Copyright : Copyright (C) 2010-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-UUID generation using Version 4 (random method) described
-in RFC4122. See http://tools.ietf.org/html/rfc4122
--}
-
-module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where
-
-import Text.Printf ( printf )
-import System.Random ( RandomGen, randoms, getStdGen )
-import Data.Word
-import Data.Bits ( setBit, clearBit )
-
-data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
- Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
-
-instance Show UUID where
- show (UUID a b c d e f g h i j k l m n o p) =
- "urn:uuid:" ++
- printf "%02x" a ++
- printf "%02x" b ++
- printf "%02x" c ++
- printf "%02x" d ++
- "-" ++
- printf "%02x" e ++
- printf "%02x" f ++
- "-" ++
- printf "%02x" g ++
- printf "%02x" h ++
- "-" ++
- printf "%02x" i ++
- printf "%02x" j ++
- "-" ++
- printf "%02x" k ++
- printf "%02x" l ++
- printf "%02x" m ++
- printf "%02x" n ++
- printf "%02x" o ++
- printf "%02x" p
-
-getUUID :: RandomGen g => g -> UUID
-getUUID gen =
- let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8]
- -- set variant
- i' = i `setBit` 7 `clearBit` 6
- -- set version (0100 for random)
- g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
- in
- UUID a b c d e f g' h i' j k l m n o p
-
-getRandomUUID :: IO UUID
-getRandomUUID = getUUID <$> getStdGen
-
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
deleted file mode 100644
index 356b29504..000000000
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ /dev/null
@@ -1,470 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.AsciiDoc
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to asciidoc.
-
-Note that some information may be lost in conversion, due to
-expressive limitations of asciidoc. Footnotes and table cells with
-paragraphs (or other block items) are not possible in asciidoc.
-If pandoc encounters one of these, it will insert a message indicating
-that it has omitted the construct.
-
-AsciiDoc: <http://www.methods.co.nz/asciidoc/>
--}
-module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (blankline, space)
-import Data.Maybe (fromMaybe)
-import Data.List ( stripPrefix, intersperse, intercalate )
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Control.Monad.State
-import qualified Data.Map as M
-import Data.Aeson (Value(String), fromJSON, toJSON, Result(..))
-import qualified Data.Text as T
-import Data.Char (isSpace, isPunctuation)
-import Text.Pandoc.Class (PandocMonad)
-
-data WriterState = WriterState { defListMarker :: String
- , orderedListLevel :: Int
- , bulletListLevel :: Int
- , intraword :: Bool
- }
-
--- | Convert Pandoc to AsciiDoc.
-writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeAsciiDoc opts document = return $
- evalState (pandocToAsciiDoc opts document) WriterState{
- defListMarker = "::"
- , orderedListLevel = 1
- , bulletListLevel = 1
- , intraword = False
- }
-
--- | Return asciidoc representation of document.
-pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String
-pandocToAsciiDoc opts (Pandoc meta blocks) = do
- let titleblock = not $ null (docTitle meta) && null (docAuthors meta) &&
- null (docDate meta)
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToAsciiDoc opts)
- (fmap (render colwidth) . inlineListToAsciiDoc opts)
- meta
- let addTitleLine (String t) = String $
- t <> "\n" <> T.replicate (T.length t) "="
- addTitleLine x = x
- let metadata' = case fromJSON metadata of
- Success m -> toJSON $ M.adjust addTitleLine
- ("title" :: T.Text) m
- _ -> metadata
- body <- blockListToAsciiDoc opts blocks
- let main = render colwidth body
- let context = defField "body" main
- $ defField "toc"
- (writerTableOfContents opts &&
- writerTemplate opts /= Nothing)
- $ defField "titleblock" titleblock
- $ metadata'
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Escape special characters for AsciiDoc.
-escapeString :: String -> String
-escapeString = escapeStringUsing escs
- where escs = backslashEscapes "{"
-
--- | Ordered list start parser for use in Para below.
-olMarker :: Parser [Char] ParserState Char
-olMarker = do (start, style', delim) <- anyOrderedListMarker
- if delim == Period &&
- (style' == UpperAlpha || (style' == UpperRoman &&
- start `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then spaceChar >> spaceChar
- else spaceChar
-
--- | True if string begins with an ordered list marker
-beginsWithOrderedListMarker :: String -> Bool
-beginsWithOrderedListMarker str =
- case runParser olMarker defaultParserState "para start" (take 10 str) of
- Left _ -> False
- Right _ -> True
-
--- | Convert Pandoc block element to asciidoc.
-blockToAsciiDoc :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState Doc
-blockToAsciiDoc _ Null = return empty
-blockToAsciiDoc opts (Plain inlines) = do
- contents <- inlineListToAsciiDoc opts inlines
- return $ contents <> blankline
-blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
- blockToAsciiDoc opts (Para [Image attr alt (src,tit)])
-blockToAsciiDoc opts (Para inlines) = do
- contents <- inlineListToAsciiDoc opts inlines
- -- escape if para starts with ordered list marker
- let esc = if beginsWithOrderedListMarker (render Nothing contents)
- then text "\\"
- else empty
- return $ esc <> contents <> blankline
-blockToAsciiDoc opts (LineBlock lns) = do
- let docify line = if null line
- then return blankline
- else inlineListToAsciiDoc opts line
- let joinWithLinefeeds = nowrap . mconcat . intersperse cr
- contents <- joinWithLinefeeds <$> mapM docify lns
- return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline
-blockToAsciiDoc _ (RawBlock f s)
- | f == "asciidoc" = return $ text s
- | otherwise = return empty
-blockToAsciiDoc _ HorizontalRule =
- return $ blankline <> text "'''''" <> blankline
-blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
- contents <- inlineListToAsciiDoc opts inlines
- let len = offset contents
- -- ident seem to be empty most of the time and asciidoc will generate them automatically
- -- so lets make them not show up when null
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
- let setext = writerSetextHeaders opts
- return $
- (if setext
- then
- identifier $$ contents $$
- (case level of
- 1 -> text $ replicate len '-'
- 2 -> text $ replicate len '~'
- 3 -> text $ replicate len '^'
- 4 -> text $ replicate len '+'
- _ -> empty) <> blankline
- else
- identifier $$ text (replicate level '=') <> space <> contents <> blankline)
-blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $
- if null classes
- then "...." $$ text str $$ "...."
- else attrs $$ "----" $$ text str $$ "----")
- <> blankline
- where attrs = "[" <> text (intercalate "," ("source" : classes)) <> "]"
-blockToAsciiDoc opts (BlockQuote blocks) = do
- contents <- blockListToAsciiDoc opts blocks
- let isBlock (BlockQuote _) = True
- isBlock _ = False
- -- if there are nested block quotes, put in an open block
- let contents' = if any isBlock blocks
- then "--" $$ contents $$ "--"
- else contents
- let cols = offset contents'
- let bar = text $ replicate cols '_'
- return $ bar $$ chomp contents' $$ bar <> blankline
-blockToAsciiDoc opts (Table caption aligns widths headers rows) = do
- caption' <- inlineListToAsciiDoc opts caption
- let caption'' = if null caption
- then empty
- else "." <> caption' <> cr
- let isSimple = all (== 0) widths
- let relativePercentWidths = if isSimple
- then widths
- else map (/ (sum widths)) widths
- let widths'' :: [Integer]
- widths'' = map (floor . (* 100)) relativePercentWidths
- -- ensure that the widths sum to 100
- let widths' = case widths'' of
- _ | isSimple -> widths''
- (w:ws) | sum (w:ws) < 100
- -> (100 - sum ws) : ws
- ws -> ws
- let totalwidth :: Integer
- totalwidth = floor $ sum widths * 100
- let colspec al wi = (case al of
- AlignLeft -> "<"
- AlignCenter -> "^"
- AlignRight -> ">"
- AlignDefault -> "") ++
- if wi == 0 then "" else (show wi ++ "%")
- let headerspec = if all null headers
- then empty
- else text "options=\"header\","
- let widthspec = if totalwidth == 0
- then empty
- else text "width="
- <> doubleQuotes (text $ show totalwidth ++ "%")
- <> text ","
- let tablespec = text "["
- <> widthspec
- <> text "cols="
- <> doubleQuotes (text $ intercalate ","
- $ zipWith colspec aligns widths')
- <> text ","
- <> headerspec <> text "]"
- let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
- return $ text "|" <> chomp d
- makeCell [Para x] = makeCell [Plain x]
- makeCell [] = return $ text "|"
- makeCell bs = do d <- blockListToAsciiDoc opts bs
- return $ text "a|" $$ d
- let makeRow cells = hsep `fmap` mapM makeCell cells
- rows' <- mapM makeRow rows
- head' <- makeRow headers
- let head'' = if all null headers then empty else head'
- let colwidth = if writerWrapText opts == WrapAuto
- then writerColumns opts
- else 100000
- let maxwidth = maximum $ map offset (head':rows')
- let body = if maxwidth > colwidth then vsep rows' else vcat rows'
- let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '='
- return $
- caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
-blockToAsciiDoc opts (BulletList items) = do
- contents <- mapM (bulletListItemToAsciiDoc opts) items
- return $ cat contents <> blankline
-blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
- let sty' = case sty of
- UpperRoman -> UpperAlpha
- LowerRoman -> LowerAlpha
- x -> x
- let markers = orderedListMarkers (1, sty', Period) -- start num not used
- let markers' = map (\m -> if length m < 3
- then m ++ replicate (3 - length m) ' '
- else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $
- zip markers' items
- return $ cat contents <> blankline
-blockToAsciiDoc opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToAsciiDoc opts) items
- return $ cat contents <> blankline
-blockToAsciiDoc opts (Div (ident,_,_) bs) = do
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
- contents <- blockListToAsciiDoc opts bs
- return $ identifier $$ contents
-
--- | Convert bullet list item (list of blocks) to asciidoc.
-bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
-bulletListItemToAsciiDoc opts blocks = do
- let addBlock :: Doc -> Block -> State WriterState Doc
- addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
- addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> chomp x
- addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> chomp x
- addBlock d b = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> text "+" <> cr <> chomp x
- lev <- bulletListLevel `fmap` get
- modify $ \s -> s{ bulletListLevel = lev + 1 }
- contents <- foldM addBlock empty blocks
- modify $ \s -> s{ bulletListLevel = lev }
- let marker = text (replicate lev '*')
- return $ marker <> text " " <> contents <> cr
-
--- | Convert ordered list item (a list of blocks) to asciidoc.
-orderedListItemToAsciiDoc :: WriterOptions -- ^ options
- -> String -- ^ list item marker
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToAsciiDoc opts marker blocks = do
- let addBlock :: Doc -> Block -> State WriterState Doc
- addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
- addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> chomp x
- addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> chomp x
- addBlock d b = do x <- blockToAsciiDoc opts b
- return $ d <> cr <> text "+" <> cr <> chomp x
- lev <- orderedListLevel `fmap` get
- modify $ \s -> s{ orderedListLevel = lev + 1 }
- contents <- foldM addBlock empty blocks
- modify $ \s -> s{ orderedListLevel = lev }
- return $ text marker <> text " " <> contents <> cr
-
--- | Convert definition list item (label, list of blocks) to asciidoc.
-definitionListItemToAsciiDoc :: WriterOptions
- -> ([Inline],[[Block]])
- -> State WriterState Doc
-definitionListItemToAsciiDoc opts (label, defs) = do
- labelText <- inlineListToAsciiDoc opts label
- marker <- defListMarker `fmap` get
- if marker == "::"
- then modify (\st -> st{ defListMarker = ";;"})
- else modify (\st -> st{ defListMarker = "::"})
- let divider = cr <> text "+" <> cr
- let defsToAsciiDoc :: [Block] -> State WriterState Doc
- defsToAsciiDoc ds = (vcat . intersperse divider . map chomp)
- `fmap` mapM (blockToAsciiDoc opts) ds
- defs' <- mapM defsToAsciiDoc defs
- modify (\st -> st{ defListMarker = marker })
- let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs'
- return $ labelText <> text marker <> cr <> contents <> cr
-
--- | Convert list of Pandoc block elements to asciidoc.
-blockListToAsciiDoc :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
-
-data SpacyLocation = End | Start
-
--- | Convert list of Pandoc inline elements to asciidoc.
-inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToAsciiDoc opts lst = do
- oldIntraword <- gets intraword
- setIntraword False
- result <- go lst
- setIntraword oldIntraword
- return result
- where go [] = return empty
- go (y:x:xs)
- | not (isSpacy End y) = do
- y' <- if isSpacy Start x
- then inlineToAsciiDoc opts y
- else withIntraword $ inlineToAsciiDoc opts y
- x' <- withIntraword $ inlineToAsciiDoc opts x
- xs' <- go xs
- return (y' <> x' <> xs')
- | not (isSpacy Start x) = do
- y' <- withIntraword $ inlineToAsciiDoc opts y
- xs' <- go (x:xs)
- return (y' <> xs')
- go (x:xs) = do
- x' <- inlineToAsciiDoc opts x
- xs' <- go xs
- return (x' <> xs')
- isSpacy :: SpacyLocation -> Inline -> Bool
- isSpacy _ Space = True
- isSpacy _ LineBreak = True
- isSpacy _ SoftBreak = True
- -- Note that \W characters count as spacy in AsciiDoc
- -- for purposes of determining interword:
- isSpacy End (Str xs) = case reverse xs of
- c:_ -> isPunctuation c || isSpace c
- _ -> False
- isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c
- isSpacy _ _ = False
-
-setIntraword :: Bool -> State WriterState ()
-setIntraword b = modify $ \st -> st{ intraword = b }
-
-withIntraword :: State WriterState a -> State WriterState a
-withIntraword p = setIntraword True *> p <* setIntraword False
-
--- | Convert Pandoc inline element to asciidoc.
-inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
-inlineToAsciiDoc opts (Emph lst) = do
- contents <- inlineListToAsciiDoc opts lst
- isIntraword <- gets intraword
- let marker = if isIntraword then "__" else "_"
- return $ marker <> contents <> marker
-inlineToAsciiDoc opts (Strong lst) = do
- contents <- inlineListToAsciiDoc opts lst
- isIntraword <- gets intraword
- let marker = if isIntraword then "**" else "*"
- return $ marker <> contents <> marker
-inlineToAsciiDoc opts (Strikeout lst) = do
- contents <- inlineListToAsciiDoc opts lst
- return $ "[line-through]*" <> contents <> "*"
-inlineToAsciiDoc opts (Superscript lst) = do
- contents <- inlineListToAsciiDoc opts lst
- return $ "^" <> contents <> "^"
-inlineToAsciiDoc opts (Subscript lst) = do
- contents <- inlineListToAsciiDoc opts lst
- return $ "~" <> contents <> "~"
-inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
-inlineToAsciiDoc opts (Quoted SingleQuote lst) =
- inlineListToAsciiDoc opts (Str "`" : lst ++ [Str "'"])
-inlineToAsciiDoc opts (Quoted DoubleQuote lst) =
- inlineListToAsciiDoc opts (Str "``" : lst ++ [Str "''"])
-inlineToAsciiDoc _ (Code _ str) = return $
- text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`"
-inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str
-inlineToAsciiDoc _ (Math InlineMath str) =
- return $ "latexmath:[$" <> text str <> "$]"
-inlineToAsciiDoc _ (Math DisplayMath str) =
- return $ "latexmath:[\\[" <> text str <> "\\]]"
-inlineToAsciiDoc _ (RawInline f s)
- | f == "asciidoc" = return $ text s
- | otherwise = return empty
-inlineToAsciiDoc _ LineBreak = return $ " +" <> cr
-inlineToAsciiDoc _ Space = return space
-inlineToAsciiDoc opts SoftBreak =
- case writerWrapText opts of
- WrapAuto -> return space
- WrapPreserve -> return cr
- WrapNone -> return space
-inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
-inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do
--- relative: link:downloads/foo.zip[download foo.zip]
--- abs: http://google.cod[Google]
--- or my@email.com[email john]
- linktext <- inlineListToAsciiDoc opts txt
- let isRelative = ':' `notElem` src
- let prefix = if isRelative
- then text "link:"
- else empty
- let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
- let useAuto = case txt of
- [Str s] | escapeURI s == srcSuffix -> True
- _ -> False
- return $ if useAuto
- then text srcSuffix
- else prefix <> text src <> "[" <> linktext <> "]"
-inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do
--- image:images/logo.png[Company logo, title="blah"]
- let txt = if (null alternate) || (alternate == [Str ""])
- then [Str "image"]
- else alternate
- linktext <- inlineListToAsciiDoc opts txt
- let linktitle = if null tit
- then empty
- else ",title=\"" <> text tit <> "\""
- showDim dir = case (dimension dir attr) of
- Just (Percent a) ->
- ["scaledwidth=" <> text (show (Percent a))]
- Just dim ->
- [text (show dir) <> "=" <> text (showInPixel opts dim)]
- Nothing ->
- []
- dimList = showDim Width ++ showDim Height
- dims = if null dimList
- then empty
- else "," <> cat (intersperse "," dimList)
- return $ "image:" <> text src <> "[" <> linktext <> linktitle <> dims <> "]"
-inlineToAsciiDoc opts (Note [Para inlines]) =
- inlineToAsciiDoc opts (Note [Plain inlines])
-inlineToAsciiDoc opts (Note [Plain inlines]) = do
- contents <- inlineListToAsciiDoc opts inlines
- return $ text "footnote:[" <> contents <> "]"
--- asciidoc can't handle blank lines in notes
-inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
-inlineToAsciiDoc opts (Span (ident,_,_) ils) = do
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
- contents <- inlineListToAsciiDoc opts ils
- return $ identifier <> contents
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
deleted file mode 100644
index b83f6785d..000000000
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ /dev/null
@@ -1,190 +0,0 @@
-{-
-Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.CommonMark
- Copyright : Copyright (C) 2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to CommonMark.
-
-CommonMark: <http://commonmark.org>
--}
-module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
-
-import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared (isTightList, linesToPara)
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import CMark
-import qualified Data.Text as T
-import Control.Monad.State (runState, State, modify, get)
-import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Class (PandocMonad)
-import Data.Foldable (foldrM)
-
--- | Convert Pandoc to CommonMark.
-writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeCommonMark opts (Pandoc meta blocks) = do
- let (blocks', notes) = runState (walkM processNotes blocks) []
- notes' = if null notes
- then []
- else [OrderedList (1, Decimal, Period) $ reverse notes]
- main <- blocksToCommonMark opts (blocks' ++ notes')
- metadata <- metaToJSON opts
- (blocksToCommonMark opts)
- (inlinesToCommonMark opts)
- meta
- let context = defField "body" main $ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
-processNotes :: Inline -> State [[Block]] Inline
-processNotes (Note bs) = do
- modify (bs :)
- notes <- get
- return $ Str $ "[" ++ show (length notes) ++ "]"
-processNotes x = return x
-
-node :: NodeType -> [Node] -> Node
-node = Node Nothing
-
-blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m String
-blocksToCommonMark opts bs = do
- let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- nodes <- blocksToNodes bs
- return $
- T.unpack $
- nodeToCommonmark cmarkOpts colwidth $
- node DOCUMENT nodes
-
-inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m String
-inlinesToCommonMark opts ils = return $
- T.unpack $ nodeToCommonmark cmarkOpts colwidth
- $ node PARAGRAPH (inlinesToNodes ils)
- where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
-
-blocksToNodes :: PandocMonad m => [Block] -> m [Node]
-blocksToNodes = foldrM blockToNodes []
-
-blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node]
-blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
-blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
-blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns
-blockToNodes (CodeBlock (_,classes,_) xs) ns = return $
- (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns)
-blockToNodes (RawBlock fmt xs) ns
- | fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns)
- | otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns)
-blockToNodes (BlockQuote bs) ns = do
- nodes <- blocksToNodes bs
- return (node BLOCK_QUOTE nodes : ns)
-blockToNodes (BulletList items) ns = do
- nodes <- mapM blocksToNodes items
- return (node (LIST ListAttributes{
- listType = BULLET_LIST,
- listDelim = PERIOD_DELIM,
- listTight = isTightList items,
- listStart = 1 }) (map (node ITEM) nodes) : ns)
-blockToNodes (OrderedList (start, _sty, delim) items) ns = do
- nodes <- mapM blocksToNodes items
- return (node (LIST ListAttributes{
- listType = ORDERED_LIST,
- listDelim = case delim of
- OneParen -> PAREN_DELIM
- TwoParens -> PAREN_DELIM
- _ -> PERIOD_DELIM,
- listTight = isTightList items,
- listStart = start }) (map (node ITEM) nodes) : ns)
-blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns)
-blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns)
-blockToNodes (Div _ bs) ns = do
- nodes <- blocksToNodes bs
- return (nodes ++ ns)
-blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns
- where items' = map dlToBullet items
- dlToBullet (term, ((Para xs : ys) : zs)) =
- Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
- dlToBullet (term, ((Plain xs : ys) : zs)) =
- Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
- dlToBullet (term, xs) =
- Para term : concat xs
-blockToNodes t@(Table _ _ _ _ _) ns = do
- s <- writeHtml5String def $! Pandoc nullMeta [t]
- return (node (HTML_BLOCK (T.pack $! s)) [] : ns)
-blockToNodes Null ns = return ns
-
-inlinesToNodes :: [Inline] -> [Node]
-inlinesToNodes = foldr inlineToNodes []
-
-inlineToNodes :: Inline -> [Node] -> [Node]
-inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
-inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
-inlineToNodes LineBreak = (node LINEBREAK [] :)
-inlineToNodes SoftBreak = (node SOFTBREAK [] :)
-inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
-inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
-inlineToNodes (Strikeout xs) =
- ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++
- [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
-inlineToNodes (Superscript xs) =
- ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++
- [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
-inlineToNodes (Subscript xs) =
- ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++
- [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
-inlineToNodes (SmallCaps xs) =
- ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) []
- : inlinesToNodes xs ++
- [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
-inlineToNodes (Link _ ils (url,tit)) =
- (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
-inlineToNodes (Image _ ils (url,tit)) =
- (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
-inlineToNodes (RawInline fmt xs)
- | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :)
- | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :)
-inlineToNodes (Quoted qt ils) =
- ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
- where (start, end) = case qt of
- SingleQuote -> (T.pack "‘", T.pack "’")
- DoubleQuote -> (T.pack "“", T.pack "”")
-inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
-inlineToNodes (Math mt str) =
- case mt of
- InlineMath ->
- (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
- DisplayMath ->
- (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
-inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
-inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
-inlineToNodes (Note _) = id -- should not occur
--- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
deleted file mode 100644
index ea8b90db3..000000000
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ /dev/null
@@ -1,481 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.ConTeXt
- Copyright : Copyright (C) 2007-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' format into ConTeXt.
--}
-module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Walk (query)
-import Text.Printf ( printf )
-import Data.List ( intercalate, intersperse )
-import Data.Char ( ord )
-import Data.Maybe ( catMaybes )
-import Control.Monad.State
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Network.URI ( isURI, unEscapeString )
-import Text.Pandoc.Class (PandocMonad)
-
-data WriterState =
- WriterState { stNextRef :: Int -- number of next URL reference
- , stOrderedListLevel :: Int -- level of ordered list
- , stOptions :: WriterOptions -- writer options
- }
-
-orderedListStyles :: [Char]
-orderedListStyles = cycle "narg"
-
--- | Convert Pandoc to ConTeXt.
-writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeConTeXt options document = return $
- let defaultWriterState = WriterState { stNextRef = 1
- , stOrderedListLevel = 0
- , stOptions = options
- }
- in evalState (pandocToConTeXt options document) defaultWriterState
-
-pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
-pandocToConTeXt options (Pandoc meta blocks) = do
- let colwidth = if writerWrapText options == WrapAuto
- then Just $ writerColumns options
- else Nothing
- metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToConTeXt)
- (fmap (render colwidth) . inlineListToConTeXt)
- meta
- body <- mapM (elementToConTeXt options) $ hierarchicalize blocks
- let main = (render colwidth . vcat) body
- let layoutFromMargins = intercalate [','] $ catMaybes $
- map (\(x,y) ->
- ((x ++ "=") ++) <$> getField y metadata)
- [("leftmargin","margin-left")
- ,("rightmargin","margin-right")
- ,("top","margin-top")
- ,("bottom","margin-bottom")
- ]
- let context = defField "toc" (writerTableOfContents options)
- $ defField "placelist" (intercalate ("," :: String) $
- take (writerTOCDepth options +
- case writerTopLevelDivision options of
- TopLevelPart -> 0
- TopLevelChapter -> 0
- _ -> 1)
- ["chapter","section","subsection","subsubsection",
- "subsubsubsection","subsubsubsubsection"])
- $ defField "body" main
- $ defField "layout" layoutFromMargins
- $ defField "number-sections" (writerNumberSections options)
- $ metadata
- let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
- getField "lang" context)
- $ defField "context-dir" (toContextDir $ getField "dir" context)
- $ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
-
-toContextDir :: Maybe String -> String
-toContextDir (Just "rtl") = "r2l"
-toContextDir (Just "ltr") = "l2r"
-toContextDir _ = ""
-
--- | escape things as needed for ConTeXt
-escapeCharForConTeXt :: WriterOptions -> Char -> String
-escapeCharForConTeXt opts ch =
- let ligatures = isEnabled Ext_smart opts in
- case ch of
- '{' -> "\\{"
- '}' -> "\\}"
- '\\' -> "\\letterbackslash{}"
- '$' -> "\\$"
- '|' -> "\\letterbar{}"
- '%' -> "\\letterpercent{}"
- '~' -> "\\lettertilde{}"
- '#' -> "\\#"
- '[' -> "{[}"
- ']' -> "{]}"
- '\160' -> "~"
- '\x2014' | ligatures -> "---"
- '\x2013' | ligatures -> "--"
- '\x2019' | ligatures -> "'"
- '\x2026' -> "\\ldots{}"
- x -> [x]
-
--- | Escape string for ConTeXt
-stringToConTeXt :: WriterOptions -> String -> String
-stringToConTeXt opts = concatMap (escapeCharForConTeXt opts)
-
--- | Sanitize labels
-toLabel :: String -> String
-toLabel z = concatMap go z
- where go x
- | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x)
- | otherwise = [x]
-
--- | Convert Elements to ConTeXt
-elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
-elementToConTeXt _ (Blk block) = blockToConTeXt block
-elementToConTeXt opts (Sec level _ attr title' elements) = do
- header' <- sectionHeader attr level title'
- innerContents <- mapM (elementToConTeXt opts) elements
- return $ vcat (header' : innerContents)
-
--- | Convert Pandoc block element to ConTeXt.
-blockToConTeXt :: Block
- -> State WriterState Doc
-blockToConTeXt Null = return empty
-blockToConTeXt (Plain lst) = inlineListToConTeXt lst
--- title beginning with fig: indicates that the image is a figure
-blockToConTeXt (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
- capt <- inlineListToConTeXt txt
- img <- inlineToConTeXt (Image attr txt (src, ""))
- let (ident, _, _) = attr
- label = if null ident
- then empty
- else "[]" <> brackets (text $ toLabel ident)
- return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline
-blockToConTeXt (Para lst) = do
- contents <- inlineListToConTeXt lst
- return $ contents <> blankline
-blockToConTeXt (LineBlock lns) = do
- doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns
- return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline
-blockToConTeXt (BlockQuote lst) = do
- contents <- blockListToConTeXt lst
- return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
-blockToConTeXt (CodeBlock _ str) =
- return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline
- -- blankline because \stoptyping can't have anything after it, inc. '}'
-blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
-blockToConTeXt (RawBlock _ _ ) = return empty
-blockToConTeXt (Div (ident,_,kvs) bs) = do
- let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
- let wrapRef txt = if null ident
- then txt
- else ("\\reference" <> brackets (text $ toLabel ident) <>
- braces empty <> "%") $$ txt
- wrapDir = case lookup "dir" kvs of
- Just "rtl" -> align "righttoleft"
- Just "ltr" -> align "lefttoright"
- _ -> id
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> "\\start\\language["
- <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
- Nothing -> txt
- wrapBlank txt = blankline <> txt <> blankline
- fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
-blockToConTeXt (BulletList lst) = do
- contents <- mapM listItemToConTeXt lst
- return $ ("\\startitemize" <> if isTightList lst
- then brackets "packed"
- else empty) $$
- vcat contents $$ text "\\stopitemize" <> blankline
-blockToConTeXt (OrderedList (start, style', delim) lst) = do
- st <- get
- let level = stOrderedListLevel st
- put $ st {stOrderedListLevel = level + 1}
- contents <- mapM listItemToConTeXt lst
- put $ st {stOrderedListLevel = level}
- let start' = if start == 1 then "" else "start=" ++ show start
- let delim' = case delim of
- DefaultDelim -> ""
- Period -> "stopper=."
- OneParen -> "stopper=)"
- TwoParens -> "left=(,stopper=)"
- let width = maximum $ map length $ take (length contents)
- (orderedListMarkers (start, style', delim))
- let width' = (toEnum width + 1) / 2
- let width'' = if width' > (1.5 :: Double)
- then "width=" ++ show width' ++ "em"
- else ""
- let specs2Items = filter (not . null) [start', delim', width'']
- let specs2 = if null specs2Items
- then ""
- else "[" ++ intercalate "," specs2Items ++ "]"
- let style'' = '[': (case style' of
- DefaultStyle -> orderedListStyles !! level
- Decimal -> 'n'
- Example -> 'n'
- LowerRoman -> 'r'
- UpperRoman -> 'R'
- LowerAlpha -> 'a'
- UpperAlpha -> 'A') :
- if isTightList lst then ",packed]" else "]"
- let specs = style'' ++ specs2
- return $ "\\startitemize" <> text specs $$ vcat contents $$
- "\\stopitemize" <> blankline
-blockToConTeXt (DefinitionList lst) =
- liftM vcat $ mapM defListItemToConTeXt lst
-blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
--- If this is ever executed, provide a default for the reference identifier.
-blockToConTeXt (Header level attr lst) = sectionHeader attr level lst
-blockToConTeXt (Table caption aligns widths heads rows) = do
- let colDescriptor colWidth alignment = (case alignment of
- AlignLeft -> 'l'
- AlignRight -> 'r'
- AlignCenter -> 'c'
- AlignDefault -> 'l'):
- if colWidth == 0
- then "|"
- else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|")
- let colDescriptors = "|" ++ (concat $
- zipWith colDescriptor widths aligns)
- headers <- if all null heads
- then return empty
- else liftM ($$ "\\HL") $ tableRowToConTeXt heads
- captionText <- inlineListToConTeXt caption
- rows' <- mapM tableRowToConTeXt rows
- return $ "\\placetable" <> (if null caption
- then brackets "none"
- else empty)
- <> braces captionText $$
- "\\starttable" <> brackets (text colDescriptors) $$
- "\\HL" $$ headers $$
- vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline
-
-tableRowToConTeXt :: [[Block]] -> State WriterState Doc
-tableRowToConTeXt cols = do
- cols' <- mapM blockListToConTeXt cols
- return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR"
-
-listItemToConTeXt :: [Block] -> State WriterState Doc
-listItemToConTeXt list = blockListToConTeXt list >>=
- return . ("\\item" $$) . (nest 2)
-
-defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
-defListItemToConTeXt (term, defs) = do
- term' <- inlineListToConTeXt term
- def' <- liftM vsep $ mapM blockListToConTeXt defs
- return $ "\\startdescription" <> braces term' $$ nest 2 def' $$
- "\\stopdescription" <> blankline
-
--- | Convert list of block elements to ConTeXt.
-blockListToConTeXt :: [Block] -> State WriterState Doc
-blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst
-
--- | Convert list of inline elements to ConTeXt.
-inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
- -> State WriterState Doc
-inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst
- -- We add a \strut after a line break that precedes a space,
- -- or the space gets swallowed
- where addStruts (LineBreak : s : xs) | isSpacey s =
- LineBreak : RawInline (Format "context") "\\strut " : s :
- addStruts xs
- addStruts (x:xs) = x : addStruts xs
- addStruts [] = []
- isSpacey Space = True
- isSpacey (Str ('\160':_)) = True
- isSpacey _ = False
-
--- | Convert inline element to ConTeXt
-inlineToConTeXt :: Inline -- ^ Inline to convert
- -> State WriterState Doc
-inlineToConTeXt (Emph lst) = do
- contents <- inlineListToConTeXt lst
- return $ braces $ "\\em " <> contents
-inlineToConTeXt (Strong lst) = do
- contents <- inlineListToConTeXt lst
- return $ braces $ "\\bf " <> contents
-inlineToConTeXt (Strikeout lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\overstrikes" <> braces contents
-inlineToConTeXt (Superscript lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\high" <> braces contents
-inlineToConTeXt (Subscript lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\low" <> braces contents
-inlineToConTeXt (SmallCaps lst) = do
- contents <- inlineListToConTeXt lst
- return $ braces $ "\\sc " <> contents
-inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) =
- return $ "\\type" <> braces (text str)
-inlineToConTeXt (Code _ str) = do
- opts <- gets stOptions
- return $ "\\mono" <> braces (text $ stringToConTeXt opts str)
-inlineToConTeXt (Quoted SingleQuote lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\quote" <> braces contents
-inlineToConTeXt (Quoted DoubleQuote lst) = do
- contents <- inlineListToConTeXt lst
- return $ "\\quotation" <> braces contents
-inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
-inlineToConTeXt (Str str) = do
- opts <- gets stOptions
- return $ text $ stringToConTeXt opts str
-inlineToConTeXt (Math InlineMath str) =
- return $ char '$' <> text str <> char '$'
-inlineToConTeXt (Math DisplayMath str) =
- return $ text "\\startformula " <> text str <> text " \\stopformula" <> space
-inlineToConTeXt (RawInline "context" str) = return $ text str
-inlineToConTeXt (RawInline "tex" str) = return $ text str
-inlineToConTeXt (RawInline _ _) = return empty
-inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
-inlineToConTeXt SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- return $ case wrapText of
- WrapAuto -> space
- WrapNone -> space
- WrapPreserve -> cr
-inlineToConTeXt Space = return space
--- Handle HTML-like internal document references to sections
-inlineToConTeXt (Link _ txt (('#' : ref), _)) = do
- opts <- gets stOptions
- contents <- inlineListToConTeXt txt
- let ref' = toLabel $ stringToConTeXt opts ref
- return $ text "\\goto"
- <> braces contents
- <> brackets (text ref')
-
-inlineToConTeXt (Link _ txt (src, _)) = do
- let isAutolink = txt == [Str (unEscapeString src)]
- st <- get
- let next = stNextRef st
- put $ st {stNextRef = next + 1}
- let ref = "url" ++ show next
- contents <- inlineListToConTeXt txt
- return $ "\\useURL"
- <> brackets (text ref)
- <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
- <> (if isAutolink
- then empty
- else brackets empty <> brackets contents)
- <> "\\from"
- <> brackets (text ref)
-inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do
- opts <- gets stOptions
- let showDim dir = let d = text (show dir) <> "="
- in case (dimension dir attr) of
- Just (Pixel a) ->
- [d <> text (showInInch opts (Pixel a)) <> "in"]
- Just (Percent a) ->
- [d <> text (showFl (a / 100)) <> "\\textwidth"]
- Just dim ->
- [d <> text (show dim)]
- Nothing ->
- []
- dimList = showDim Width ++ showDim Height
- dims = if null dimList
- then empty
- else brackets $ cat (intersperse "," dimList)
- clas = if null cls
- then empty
- else brackets $ text $ toLabel $ head cls
- src' = if isURI src
- then src
- else unEscapeString src
- return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas
-inlineToConTeXt (Note contents) = do
- contents' <- blockListToConTeXt contents
- let codeBlock x@(CodeBlock _ _) = [x]
- codeBlock _ = []
- let codeBlocks = query codeBlock contents
- return $ if null codeBlocks
- then text "\\footnote{" <> nest 2 contents' <> char '}'
- else text "\\startbuffer " <> nest 2 contents' <>
- text "\\stopbuffer\\footnote{\\getbuffer}"
-inlineToConTeXt (Span (_,_,kvs) ils) = do
- let wrapDir txt = case lookup "dir" kvs of
- Just "rtl" -> braces $ "\\righttoleft " <> txt
- Just "ltr" -> braces $ "\\lefttoright " <> txt
- _ -> txt
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
- <> "]" <> txt <> "\\stop "
- Nothing -> txt
- fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
-
--- | Craft the section header, inserting the section reference, if supplied.
-sectionHeader :: Attr
- -> Int
- -> [Inline]
- -> State WriterState Doc
-sectionHeader (ident,classes,_) hdrLevel lst = do
- contents <- inlineListToConTeXt lst
- st <- get
- let opts = stOptions st
- let level' = case writerTopLevelDivision opts of
- TopLevelPart -> hdrLevel - 2
- TopLevelChapter -> hdrLevel - 1
- TopLevelSection -> hdrLevel
- TopLevelDefault -> hdrLevel
- let ident' = toLabel ident
- let (section, chapter) = if "unnumbered" `elem` classes
- then (text "subject", text "title")
- else (text "section", text "chapter")
- return $ case level' of
- -1 -> text "\\part" <> braces contents
- 0 -> char '\\' <> chapter <> braces contents
- n | n >= 1 && n <= 5 -> char '\\'
- <> text (concat (replicate (n - 1) "sub"))
- <> section
- <> (if (not . null) ident'
- then brackets (text ident')
- else empty)
- <> braces contents
- <> blankline
- _ -> contents <> blankline
-
-fromBcp47' :: String -> String
-fromBcp47' = fromBcp47 . splitBy (=='-')
-
--- Takes a list of the constituents of a BCP 47 language code
--- and irons out ConTeXt's exceptions
--- https://tools.ietf.org/html/bcp47#section-2.1
--- http://wiki.contextgarden.net/Language_Codes
-fromBcp47 :: [String] -> String
-fromBcp47 [] = ""
-fromBcp47 ("ar":"SY":_) = "ar-sy"
-fromBcp47 ("ar":"IQ":_) = "ar-iq"
-fromBcp47 ("ar":"JO":_) = "ar-jo"
-fromBcp47 ("ar":"LB":_) = "ar-lb"
-fromBcp47 ("ar":"DZ":_) = "ar-dz"
-fromBcp47 ("ar":"MA":_) = "ar-ma"
-fromBcp47 ("de":"1901":_) = "deo"
-fromBcp47 ("de":"DE":_) = "de-de"
-fromBcp47 ("de":"AT":_) = "de-at"
-fromBcp47 ("de":"CH":_) = "de-ch"
-fromBcp47 ("el":"poly":_) = "agr"
-fromBcp47 ("en":"US":_) = "en-us"
-fromBcp47 ("en":"GB":_) = "en-gb"
-fromBcp47 ("grc":_) = "agr"
-fromBcp47 x = fromIso $ head x
- where
- fromIso "el" = "gr"
- fromIso "eu" = "ba"
- fromIso "he" = "il"
- fromIso "jp" = "ja"
- fromIso "uk" = "ua"
- fromIso "vi" = "vn"
- fromIso "zh" = "cn"
- fromIso l = l
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
deleted file mode 100644
index cf641dcd6..000000000
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ /dev/null
@@ -1,322 +0,0 @@
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE FlexibleInstances, OverloadedStrings,
- ScopedTypeVariables, DeriveDataTypeable, CPP #-}
-#if MIN_VERSION_base(4,8,0)
-#else
-{-# LANGUAGE OverlappingInstances #-}
-#endif
-{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Custom
- Copyright : Copyright (C) 2012-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to custom markup using
-a lua writer.
--}
-module Text.Pandoc.Writers.Custom ( writeCustom ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Data.List ( intersperse )
-import Data.Char ( toLower )
-import Data.Typeable
-import Scripting.Lua (LuaState, StackValue, callfunc)
-import Text.Pandoc.Writers.Shared
-import qualified Scripting.Lua as Lua
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad (when)
-import Control.Exception
-import qualified Data.Map as M
-import Text.Pandoc.Templates
-import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8)
-
-attrToMap :: Attr -> M.Map String String
-attrToMap (id',classes,keyvals) = M.fromList
- $ ("id", id')
- : ("class", unwords classes)
- : keyvals
-
-#if MIN_VERSION_hslua(0,4,0)
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Char] where
-#else
-instance StackValue [Char] where
-#endif
- push lua cs = Lua.push lua (UTF8.fromString cs)
- peek lua i = do
- res <- Lua.peek lua i
- return $ UTF8.toString `fmap` res
- valuetype _ = Lua.TSTRING
-#else
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue a => StackValue [a] where
-#else
-instance StackValue a => StackValue [a] where
-#endif
- push lua xs = do
- Lua.createtable lua (length xs + 1) 0
- let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i
- mapM_ addValue $ zip [1..] xs
- peek lua i = do
- top <- Lua.gettop lua
- let i' = if i < 0 then top + i + 1 else i
- Lua.pushnil lua
- lst <- getList lua i'
- Lua.pop lua 1
- return (Just lst)
- valuetype _ = Lua.TTABLE
-
-getList :: StackValue a => LuaState -> Int -> IO [a]
-getList lua i' = do
- continue <- Lua.next lua i'
- if continue
- then do
- next <- Lua.peek lua (-1)
- Lua.pop lua 1
- x <- maybe (fail "peek returned Nothing") return next
- rest <- getList lua i'
- return (x : rest)
- else return []
-#endif
-
-instance StackValue Format where
- push lua (Format f) = Lua.push lua (map toLower f)
- peek l n = fmap Format `fmap` Lua.peek l n
- valuetype _ = Lua.TSTRING
-
-instance (StackValue a, StackValue b) => StackValue (M.Map a b) where
- push lua m = do
- let xs = M.toList m
- Lua.createtable lua (length xs + 1) 0
- let addValue (k, v) = Lua.push lua k >> Lua.push lua v >>
- Lua.rawset lua (-3)
- mapM_ addValue xs
- peek _ _ = undefined -- not needed for our purposes
- valuetype _ = Lua.TTABLE
-
-instance (StackValue a, StackValue b) => StackValue (a,b) where
- push lua (k,v) = do
- Lua.createtable lua 2 0
- Lua.push lua k
- Lua.push lua v
- Lua.rawset lua (-3)
- peek _ _ = undefined -- not needed for our purposes
- valuetype _ = Lua.TTABLE
-
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Inline] where
-#else
-instance StackValue [Inline] where
-#endif
- push l ils = Lua.push l =<< inlineListToCustom l ils
- peek _ _ = undefined
- valuetype _ = Lua.TSTRING
-
-#if MIN_VERSION_base(4,8,0)
-instance {-# OVERLAPS #-} StackValue [Block] where
-#else
-instance StackValue [Block] where
-#endif
- push l ils = Lua.push l =<< blockListToCustom l ils
- peek _ _ = undefined
- valuetype _ = Lua.TSTRING
-
-instance StackValue MetaValue where
- push l (MetaMap m) = Lua.push l m
- push l (MetaList xs) = Lua.push l xs
- push l (MetaBool x) = Lua.push l x
- push l (MetaString s) = Lua.push l s
- push l (MetaInlines ils) = Lua.push l ils
- push l (MetaBlocks bs) = Lua.push l bs
- peek _ _ = undefined
- valuetype (MetaMap _) = Lua.TTABLE
- valuetype (MetaList _) = Lua.TTABLE
- valuetype (MetaBool _) = Lua.TBOOLEAN
- valuetype (MetaString _) = Lua.TSTRING
- valuetype (MetaInlines _) = Lua.TSTRING
- valuetype (MetaBlocks _) = Lua.TSTRING
-
-instance StackValue Citation where
- push lua cit = do
- Lua.createtable lua 6 0
- let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >>
- Lua.rawset lua (-3)
- addValue ("citationId", citationId cit)
- addValue ("citationPrefix", citationPrefix cit)
- addValue ("citationSuffix", citationSuffix cit)
- addValue ("citationMode", show (citationMode cit))
- addValue ("citationNoteNum", citationNoteNum cit)
- addValue ("citationHash", citationHash cit)
- peek = undefined
- valuetype _ = Lua.TTABLE
-
-data PandocLuaException = PandocLuaException String
- deriving (Show, Typeable)
-
-instance Exception PandocLuaException
-
--- | Convert Pandoc to custom markup.
-writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
-writeCustom luaFile opts doc@(Pandoc meta _) = do
- luaScript <- UTF8.readFile luaFile
- enc <- getForeignEncoding
- setForeignEncoding utf8
- lua <- Lua.newstate
- Lua.openlibs lua
- status <- Lua.loadstring lua luaScript luaFile
- -- check for error in lua script (later we'll change the return type
- -- to handle this more gracefully):
- when (status /= 0) $
-#if MIN_VERSION_hslua(0,4,0)
- Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString
-#else
- Lua.tostring lua 1 >>= throw . PandocLuaException
-#endif
- Lua.call lua 0 0
- -- TODO - call hierarchicalize, so we have that info
- rendered <- docToCustom lua opts doc
- context <- metaToJSON opts
- (blockListToCustom lua)
- (inlineListToCustom lua)
- meta
- Lua.close lua
- setForeignEncoding enc
- let body = rendered
- case writerTemplate opts of
- Nothing -> return body
- Just tpl -> return $ renderTemplate' tpl $ setField "body" body context
-
-docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String
-docToCustom lua opts (Pandoc (Meta metamap) blocks) = do
- body <- blockListToCustom lua blocks
- callfunc lua "Doc" body metamap (writerVariables opts)
-
--- | Convert Pandoc block element to Custom.
-blockToCustom :: LuaState -- ^ Lua state
- -> Block -- ^ Block element
- -> IO String
-
-blockToCustom _ Null = return ""
-
-blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines
-
-blockToCustom lua (Para [Image attr txt (src,tit)]) =
- callfunc lua "CaptionedImage" src tit txt (attrToMap attr)
-
-blockToCustom lua (Para inlines) = callfunc lua "Para" inlines
-
-blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList
-
-blockToCustom lua (RawBlock format str) =
- callfunc lua "RawBlock" format str
-
-blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule"
-
-blockToCustom lua (Header level attr inlines) =
- callfunc lua "Header" level inlines (attrToMap attr)
-
-blockToCustom lua (CodeBlock attr str) =
- callfunc lua "CodeBlock" str (attrToMap attr)
-
-blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks
-
-blockToCustom lua (Table capt aligns widths headers rows') =
- callfunc lua "Table" capt (map show aligns) widths headers rows'
-
-blockToCustom lua (BulletList items) = callfunc lua "BulletList" items
-
-blockToCustom lua (OrderedList (num,sty,delim) items) =
- callfunc lua "OrderedList" items num (show sty) (show delim)
-
-blockToCustom lua (DefinitionList items) =
- callfunc lua "DefinitionList" items
-
-blockToCustom lua (Div attr items) =
- callfunc lua "Div" items (attrToMap attr)
-
--- | Convert list of Pandoc block elements to Custom.
-blockListToCustom :: LuaState -- ^ Options
- -> [Block] -- ^ List of block elements
- -> IO String
-blockListToCustom lua xs = do
- blocksep <- callfunc lua "Blocksep"
- bs <- mapM (blockToCustom lua) xs
- return $ mconcat $ intersperse blocksep bs
-
--- | Convert list of Pandoc inline elements to Custom.
-inlineListToCustom :: LuaState -> [Inline] -> IO String
-inlineListToCustom lua lst = do
- xs <- mapM (inlineToCustom lua) lst
- return $ concat xs
-
--- | Convert Pandoc inline element to Custom.
-inlineToCustom :: LuaState -> Inline -> IO String
-
-inlineToCustom lua (Str str) = callfunc lua "Str" str
-
-inlineToCustom lua Space = callfunc lua "Space"
-
-inlineToCustom lua SoftBreak = callfunc lua "SoftBreak"
-
-inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst
-
-inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst
-
-inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst
-
-inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst
-
-inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst
-
-inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst
-
-inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst
-
-inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst
-
-inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs
-
-inlineToCustom lua (Code attr str) =
- callfunc lua "Code" str (attrToMap attr)
-
-inlineToCustom lua (Math DisplayMath str) =
- callfunc lua "DisplayMath" str
-
-inlineToCustom lua (Math InlineMath str) =
- callfunc lua "InlineMath" str
-
-inlineToCustom lua (RawInline format str) =
- callfunc lua "RawInline" format str
-
-inlineToCustom lua (LineBreak) = callfunc lua "LineBreak"
-
-inlineToCustom lua (Link attr txt (src,tit)) =
- callfunc lua "Link" txt src tit (attrToMap attr)
-
-inlineToCustom lua (Image attr alt (src,tit)) =
- callfunc lua "Image" alt src tit (attrToMap attr)
-
-inlineToCustom lua (Note contents) = callfunc lua "Note" contents
-
-inlineToCustom lua (Span attr items) =
- callfunc lua "Span" items (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
deleted file mode 100644
index 597851f65..000000000
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ /dev/null
@@ -1,440 +0,0 @@
-{-# LANGUAGE OverloadedStrings, PatternGuards #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Docbook
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to Docbook XML.
--}
-module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Shared
-import Text.Pandoc.Walk
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.Math
-import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf )
-import Data.Char ( toLower )
-import Data.Monoid ( Any(..) )
-import Text.Pandoc.Highlighting ( languages, languagesByExtension )
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import qualified Text.Pandoc.Builder as B
-import Text.TeXMath
-import qualified Text.XML.Light as Xml
-import Data.Generics (everywhere, mkT)
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-import Control.Monad.Reader
-
-data DocBookVersion = DocBook4 | DocBook5
- deriving (Eq, Show)
-
-type DB = ReaderT DocBookVersion
-
--- | Convert list of authors to a docbook <author> section
-authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
-authorToDocbook opts name' = do
- name <- render Nothing <$> inlinesToDocbook opts name'
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- return $ B.rawInline "docbook" $ render colwidth $
- if ',' `elem` name
- then -- last name first
- let (lastname, rest) = break (==',') name
- firstname = triml rest in
- inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
- else -- last name last
- let namewords = words name
- lengthname = length namewords
- (firstname, lastname) = case lengthname of
- 0 -> ("","")
- 1 -> ("", name)
- n -> (intercalate " " (take (n-1) namewords), last namewords)
- in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
- inTagsSimple "surname" (text $ escapeStringForXML lastname)
-
-writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeDocbook4 opts d =
- runReaderT (writeDocbook opts d) DocBook4
-
-writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeDocbook5 opts d =
- runReaderT (writeDocbook opts d) DocBook5
-
--- | Convert Pandoc document to string in Docbook format.
-writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String
-writeDocbook opts (Pandoc meta blocks) = do
- let elements = hierarchicalize blocks
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- let render' = render colwidth
- let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr)
- (writerTemplate opts) &&
- TopLevelDefault == writerTopLevelDivision opts)
- then opts{ writerTopLevelDivision = TopLevelChapter }
- else opts
- -- The numbering here follows LaTeX's internal numbering
- let startLvl = case writerTopLevelDivision opts' of
- TopLevelPart -> -1
- TopLevelChapter -> 0
- TopLevelSection -> 1
- TopLevelDefault -> 1
- auths' <- mapM (authorToDocbook opts) $ docAuthors meta
- let meta' = B.setMeta "author" auths' meta
- metadata <- metaToJSON opts
- (fmap (render colwidth . vcat) .
- (mapM (elementToDocbook opts' startLvl) .
- hierarchicalize))
- (fmap (render colwidth) . inlinesToDocbook opts')
- meta'
- main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements)
- let context = defField "body" main
- $ defField "mathml" (case writerHTMLMathMethod opts of
- MathML -> True
- _ -> False)
- $ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
--- | Convert an Element to Docbook.
-elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
-elementToDocbook opts _ (Blk block) = blockToDocbook opts block
-elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do
- version <- ask
- -- Docbook doesn't allow sections with no content, so insert some if needed
- let elements' = if null elements
- then [Blk (Para [])]
- else elements
- tag = case lvl of
- -1 -> "part"
- 0 -> "chapter"
- n | n >= 1 && n <= 5 -> if version == DocBook5
- then "section"
- else "sect" ++ show n
- _ -> "simplesect"
- idName = if version == DocBook5
- then "xml:id"
- else "id"
- idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')]
- nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
- else []
- attribs = nsAttr ++ idAttr
- contents <- mapM (elementToDocbook opts (lvl + 1)) elements'
- title' <- inlinesToDocbook opts title
- return $ inTags True tag attribs $
- inTagsSimple "title" title' $$ vcat contents
-
--- | Convert a list of Pandoc blocks to Docbook.
-blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
-blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts)
-
--- | Auxiliary function to convert Plain block to Para.
-plainToPara :: Block -> Block
-plainToPara (Plain x) = Para x
-plainToPara x = x
-
--- | Convert a list of pairs of terms and definitions into a list of
--- Docbook varlistentrys.
-deflistItemsToDocbook :: PandocMonad m
- => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc
-deflistItemsToDocbook opts items =
- vcat <$> mapM (\(term, defs) -> deflistItemToDocbook opts term defs) items
-
--- | Convert a term and a list of blocks into a Docbook varlistentry.
-deflistItemToDocbook :: PandocMonad m
- => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc
-deflistItemToDocbook opts term defs = do
- term' <- inlinesToDocbook opts term
- def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs
- return $ inTagsIndented "varlistentry" $
- inTagsIndented "term" term' $$
- inTagsIndented "listitem" def'
-
--- | Convert a list of lists of blocks to a list of Docbook list items.
-listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc
-listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items
-
--- | Convert a list of blocks into a Docbook list item.
-listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
-listItemToDocbook opts item =
- inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item)
-
-imageToDocbook :: WriterOptions -> Attr -> String -> Doc
-imageToDocbook _ attr src = selfClosingTag "imagedata" $
- ("fileref", src) : idAndRole attr ++ dims
- where
- dims = go Width "width" ++ go Height "depth"
- go dir dstr = case (dimension dir attr) of
- Just a -> [(dstr, show a)]
- Nothing -> []
-
--- | Convert a Pandoc block element to Docbook.
-blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc
-blockToDocbook _ Null = return empty
--- Add ids to paragraphs in divs with ids - this is needed for
--- pandoc-citeproc to get link anchors in bibliographies:
-blockToDocbook opts (Div (ident,_,_) [Para lst]) =
- let attribs = [("id", ident) | not (null ident)] in
- if hasLineBreaks lst
- then (flush . nowrap . inTags False "literallayout" attribs)
- <$> inlinesToDocbook opts lst
- else inTags True "para" attribs <$> inlinesToDocbook opts lst
-blockToDocbook opts (Div (ident,_,_) bs) = do
- contents <- blocksToDocbook opts (map plainToPara bs)
- return $
- (if null ident
- then mempty
- else selfClosingTag "anchor" [("id", ident)]) $$ contents
-blockToDocbook _ (Header _ _ _) =
- return empty -- should not occur after hierarchicalize
-blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
--- title beginning with fig: indicates that the image is a figure
-blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
- alt <- inlinesToDocbook opts txt
- let capt = if null txt
- then empty
- else inTagsSimple "title" alt
- return $ inTagsIndented "figure" $
- capt $$
- (inTagsIndented "mediaobject" $
- (inTagsIndented "imageobject"
- (imageToDocbook opts attr src)) $$
- inTagsSimple "textobject" (inTagsSimple "phrase" alt))
-blockToDocbook opts (Para lst)
- | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout")
- <$> inlinesToDocbook opts lst
- | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst
-blockToDocbook opts (LineBlock lns) =
- blockToDocbook opts $ linesToPara lns
-blockToDocbook opts (BlockQuote blocks) =
- inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
- text ("<programlisting" ++ lang ++ ">") <> cr <>
- flush (text (escapeStringForXML str) <> cr <> text "</programlisting>")
- where lang = if null langs
- then ""
- else " language=\"" ++ escapeStringForXML (head langs) ++
- "\""
- isLang l = map toLower l `elem` map (map toLower) languages
- langsFrom s = if isLang s
- then [s]
- else languagesByExtension . map toLower $ s
- langs = concatMap langsFrom classes
-blockToDocbook opts (BulletList lst) = do
- let attribs = [("spacing", "compact") | isTightList lst]
- inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst
-blockToDocbook _ (OrderedList _ []) = return empty
-blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do
- let numeration = case numstyle of
- DefaultStyle -> []
- Decimal -> [("numeration", "arabic")]
- Example -> [("numeration", "arabic")]
- UpperAlpha -> [("numeration", "upperalpha")]
- LowerAlpha -> [("numeration", "loweralpha")]
- UpperRoman -> [("numeration", "upperroman")]
- LowerRoman -> [("numeration", "lowerroman")]
- spacing = [("spacing", "compact") | isTightList (first:rest)]
- attribs = numeration ++ spacing
- items <- if start == 1
- then listItemsToDocbook opts (first:rest)
- else do
- first' <- blocksToDocbook opts (map plainToPara first)
- rest' <- listItemsToDocbook opts rest
- return $
- (inTags True "listitem" [("override",show start)] first') $$
- rest'
- return $ inTags True "orderedlist" attribs items
-blockToDocbook opts (DefinitionList lst) = do
- let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst]
- inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst
-blockToDocbook _ b@(RawBlock f str)
- | f == "docbook" = return $ text str -- raw XML block
- | f == "html" = do
- version <- ask
- if version == DocBook5
- then return empty -- No html in Docbook5
- else return $ text str -- allow html for backwards compatibility
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToDocbook _ HorizontalRule = return empty -- not semantic
-blockToDocbook opts (Table caption aligns widths headers rows) = do
- captionDoc <- if null caption
- then return empty
- else inTagsIndented "title" <$>
- inlinesToDocbook opts caption
- let tableType = if isEmpty captionDoc then "informaltable" else "table"
- percent w = show (truncate (100*w) :: Integer) ++ "*"
- coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec"
- ([("colwidth", percent w) | w > 0] ++
- [("align", alignmentToString al)])) widths aligns
- head' <- if all null headers
- then return empty
- else inTagsIndented "thead" <$> tableRowToDocbook opts headers
- body' <- (inTagsIndented "tbody" . vcat) <$>
- mapM (tableRowToDocbook opts) rows
- return $ inTagsIndented tableType $ captionDoc $$
- (inTags True "tgroup" [("cols", show (length headers))] $
- coltags $$ head' $$ body')
-
-hasLineBreaks :: [Inline] -> Bool
-hasLineBreaks = getAny . query isLineBreak . walk removeNote
- where
- removeNote :: Inline -> Inline
- removeNote (Note _) = Str ""
- removeNote x = x
- isLineBreak :: Inline -> Any
- isLineBreak LineBreak = Any True
- isLineBreak _ = Any False
-
-alignmentToString :: Alignment -> [Char]
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
-tableRowToDocbook :: PandocMonad m
- => WriterOptions
- -> [[Block]]
- -> DB m Doc
-tableRowToDocbook opts cols =
- (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols
-
-tableItemToDocbook :: PandocMonad m
- => WriterOptions
- -> [Block]
- -> DB m Doc
-tableItemToDocbook opts item =
- (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item
-
--- | Convert a list of inline elements to Docbook.
-inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc
-inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst
-
--- | Convert an inline element to Docbook.
-inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc
-inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str
-inlineToDocbook opts (Emph lst) =
- inTagsSimple "emphasis" <$> inlinesToDocbook opts lst
-inlineToDocbook opts (Strong lst) =
- inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst
-inlineToDocbook opts (Strikeout lst) =
- inTags False "emphasis" [("role", "strikethrough")] <$>
- inlinesToDocbook opts lst
-inlineToDocbook opts (Superscript lst) =
- inTagsSimple "superscript" <$> inlinesToDocbook opts lst
-inlineToDocbook opts (Subscript lst) =
- inTagsSimple "subscript" <$> inlinesToDocbook opts lst
-inlineToDocbook opts (SmallCaps lst) =
- inTags False "emphasis" [("role", "smallcaps")] <$>
- inlinesToDocbook opts lst
-inlineToDocbook opts (Quoted _ lst) =
- inTagsSimple "quote" <$> inlinesToDocbook opts lst
-inlineToDocbook opts (Cite _ lst) =
- inlinesToDocbook opts lst
-inlineToDocbook opts (Span (ident,_,_) ils) =
- ((if null ident
- then mempty
- else selfClosingTag "anchor" [("id", ident)]) <>) <$>
- inlinesToDocbook opts ils
-inlineToDocbook _ (Code _ str) =
- return $ inTagsSimple "literal" $ text (escapeStringForXML str)
-inlineToDocbook opts (Math t str)
- | isMathML (writerHTMLMathMethod opts) = do
- res <- convertMath writeMathML t str
- case res of
- Right r -> return $ inTagsSimple tagtype
- $ text $ Xml.ppcElement conf
- $ fixNS
- $ removeAttr r
- Left il -> inlineToDocbook opts il
- | otherwise =
- texMathToInlines t str >>= inlinesToDocbook opts
- where tagtype = case t of
- InlineMath -> "inlineequation"
- DisplayMath -> "informalequation"
- conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
- removeAttr e = e{ Xml.elAttribs = [] }
- fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
- fixNS = everywhere (mkT fixNS')
-inlineToDocbook _ il@(RawInline f x)
- | f == "html" || f == "docbook" = return $ text x
- | otherwise = do
- report $ InlineNotRendered il
- return empty
-inlineToDocbook _ LineBreak = return $ text "\n"
--- currently ignore, would require the option to add custom
--- styles to the document
-inlineToDocbook _ Space = return space
--- because we use \n for LineBreak, we can't do soft breaks:
-inlineToDocbook _ SoftBreak = return space
-inlineToDocbook opts (Link attr txt (src, _))
- | Just email <- stripPrefix "mailto:" src =
- let emailLink = inTagsSimple "email" $ text $
- escapeStringForXML $ email
- in case txt of
- [Str s] | escapeURI s == email -> return emailLink
- _ -> do contents <- inlinesToDocbook opts txt
- return $ contents <+>
- char '(' <> emailLink <> char ')'
- | otherwise = do
- version <- ask
- (if isPrefixOf "#" src
- then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr
- else if version == DocBook5
- then inTags False "link" $ ("xlink:href", src) : idAndRole attr
- else inTags False "ulink" $ ("url", src) : idAndRole attr )
- <$> inlinesToDocbook opts txt
-inlineToDocbook opts (Image attr _ (src, tit)) = return $
- let titleDoc = if null tit
- then empty
- else inTagsIndented "objectinfo" $
- inTagsIndented "title" (text $ escapeStringForXML tit)
- in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
- titleDoc $$ imageToDocbook opts attr src
-inlineToDocbook opts (Note contents) =
- inTagsIndented "footnote" <$> blocksToDocbook opts contents
-
-isMathML :: HTMLMathMethod -> Bool
-isMathML MathML = True
-isMathML _ = False
-
-idAndRole :: Attr -> [(String, String)]
-idAndRole (id',cls,_) = ident ++ role
- where
- ident = if null id'
- then []
- else [("id", id')]
- role = if null cls
- then []
- else [("role", unwords cls)]
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
deleted file mode 100644
index 56aa29211..000000000
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ /dev/null
@@ -1,1302 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns, DeriveFunctor #-}
-{-
-Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Docx
- Copyright : Copyright (C) 2012-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to docx.
--}
-module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import qualified Text.Pandoc.UTF8 as UTF8
-import Codec.Archive.Zip
-import Data.Time.Clock.POSIX
-import Text.Pandoc.Compat.Time
-import Text.Pandoc.Definition
-import Text.Pandoc.Generic
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Shared hiding (Element)
-import Text.Pandoc.Writers.Shared (fixDisplayMath)
-import Text.Pandoc.Options
-import Text.Pandoc.Writers.Math
-import Text.Pandoc.Highlighting ( highlight )
-import Text.Pandoc.Walk
-import Text.Pandoc.Error (PandocError)
-import Text.XML.Light as XML
-import Text.TeXMath
-import Text.Pandoc.Readers.Docx.StyleMap
-import Control.Monad.Reader
-import Control.Monad.State
-import Skylighting
-import Control.Monad.Except (runExceptT)
-import System.Random (randomR)
-import Text.Printf (printf)
-import Data.Monoid ((<>))
-import qualified Data.Text as T
-import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
- extensionFromMimeType)
-import Control.Applicative ((<|>))
-import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing)
-import Data.Char (ord, isSpace, toLower)
-import Text.Pandoc.Class (PandocMonad, report)
-import qualified Text.Pandoc.Class as P
-import Text.Pandoc.Logging
-
-data ListMarker = NoMarker
- | BulletMarker
- | NumberMarker ListNumberStyle ListNumberDelim Int
- deriving (Show, Read, Eq, Ord)
-
-listMarkerToId :: ListMarker -> String
-listMarkerToId NoMarker = "990"
-listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) =
- '9' : '9' : styNum : delimNum : show n
- where styNum = case sty of
- DefaultStyle -> '2'
- Example -> '3'
- Decimal -> '4'
- LowerRoman -> '5'
- UpperRoman -> '6'
- LowerAlpha -> '7'
- UpperAlpha -> '8'
- delimNum = case delim of
- DefaultDelim -> '0'
- Period -> '1'
- OneParen -> '2'
- TwoParens -> '3'
-
-data WriterEnv = WriterEnv{ envTextProperties :: [Element]
- , envParaProperties :: [Element]
- , envRTL :: Bool
- , envListLevel :: Int
- , envListNumId :: Int
- , envInDel :: Bool
- , envChangesAuthor :: String
- , envChangesDate :: String
- , envPrintWidth :: Integer
- }
-
-defaultWriterEnv :: WriterEnv
-defaultWriterEnv = WriterEnv{ envTextProperties = []
- , envParaProperties = []
- , envRTL = False
- , envListLevel = -1
- , envListNumId = 1
- , envInDel = False
- , envChangesAuthor = "unknown"
- , envChangesDate = "1969-12-31T19:00:00Z"
- , envPrintWidth = 1
- }
-
-data WriterState = WriterState{
- stFootnotes :: [Element]
- , stSectionIds :: Set.Set String
- , stExternalLinks :: M.Map String String
- , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
- , stLists :: [ListMarker]
- , stInsId :: Int
- , stDelId :: Int
- , stStyleMaps :: StyleMaps
- , stFirstPara :: Bool
- , stTocTitle :: [Inline]
- , stDynamicParaProps :: [String]
- , stDynamicTextProps :: [String]
- }
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState{
- stFootnotes = defaultFootnotes
- , stSectionIds = Set.empty
- , stExternalLinks = M.empty
- , stImages = M.empty
- , stLists = [NoMarker]
- , stInsId = 1
- , stDelId = 1
- , stStyleMaps = defaultStyleMaps
- , stFirstPara = False
- , stTocTitle = [Str "Table of Contents"]
- , stDynamicParaProps = []
- , stDynamicTextProps = []
- }
-
-type WS m = ReaderT WriterEnv (StateT WriterState m)
-
-mknode :: Node t => String -> [(String,String)] -> t -> Element
-mknode s attrs =
- add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s)
-
-nodename :: String -> QName
-nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
- (xs,[]) -> (xs, Nothing)
- (ys, _:zs) -> (zs, Just ys)
-
-toLazy :: B.ByteString -> BL.ByteString
-toLazy = BL.fromChunks . (:[])
-
-renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
- UTF8.fromStringLazy (showElement elt)
-
-renumIdMap :: Int -> [Element] -> M.Map String String
-renumIdMap _ [] = M.empty
-renumIdMap n (e:es)
- | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
- M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es)
- | otherwise = renumIdMap n es
-
-replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
-replaceAttr _ _ [] = []
-replaceAttr f val (a:as) | f (attrKey a) =
- (XML.Attr (attrKey a) val) : (replaceAttr f val as)
- | otherwise = a : (replaceAttr f val as)
-
-renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element
-renumId f renumMap e
- | Just oldId <- findAttrBy f e
- , Just newId <- M.lookup oldId renumMap =
- let attrs' = replaceAttr f newId (elAttribs e)
- in
- e { elAttribs = attrs' }
- | otherwise = e
-
-renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
-renumIds f renumMap = map (renumId f renumMap)
-
--- | Certain characters are invalid in XML even if escaped.
--- See #1992
-stripInvalidChars :: String -> String
-stripInvalidChars = filter isValidChar
-
--- | See XML reference
-isValidChar :: Char -> Bool
-isValidChar (ord -> c)
- | c == 0x9 = True
- | c == 0xA = True
- | c == 0xD = True
- | 0x20 <= c && c <= 0xD7FF = True
- | 0xE000 <= c && c <= 0xFFFD = True
- | 0x10000 <= c && c <= 0x10FFFF = True
- | otherwise = False
-
-metaValueToInlines :: MetaValue -> [Inline]
-metaValueToInlines (MetaString s) = [Str s]
-metaValueToInlines (MetaInlines ils) = ils
-metaValueToInlines (MetaBlocks bs) = query return bs
-metaValueToInlines (MetaBool b) = [Str $ show b]
-metaValueToInlines _ = []
-
-
-
-writeDocx :: (PandocMonad m)
- => WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> m BL.ByteString
-writeDocx opts doc@(Pandoc meta _) = do
- let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath $ doc
- username <- P.lookupEnv "USERNAME"
- utctime <- P.getCurrentTime
- distArchive <- (toArchive . BL.fromStrict) <$>
- P.readDataFile datadir "reference.docx"
- refArchive <- case writerReferenceDoc opts of
- Just f -> toArchive <$> P.readFileLazy f
- Nothing -> return distArchive
-
- parsedDoc <- parseXml refArchive distArchive "word/document.xml"
- let wname f qn = qPrefix qn == Just "w" && f (qName qn)
- let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
-
- -- Gets the template size
- let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz")))
- let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName))
-
- let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar")))
- let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName))
- let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName))
-
- -- Get the avaible area (converting the size and the margins to int and
- -- doing the difference
- let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer)
- <*> (
- (+) <$> (read <$> mbAttrMarRight ::Maybe Integer)
- <*> (read <$> mbAttrMarLeft ::Maybe Integer)
- )
-
- -- styles
- let stylepath = "word/styles.xml"
- styledoc <- parseXml refArchive distArchive stylepath
-
- -- parse styledoc for heading styles
- let styleMaps = getStyleMaps styledoc
-
- let tocTitle = fromMaybe (stTocTitle defaultWriterState) $
- metaValueToInlines <$> lookupMeta "toc-title" meta
-
- let initialSt = defaultWriterState {
- stStyleMaps = styleMaps
- , stTocTitle = tocTitle
- }
-
- let isRTLmeta = case lookupMeta "dir" meta of
- Just (MetaString "rtl") -> True
- Just (MetaInlines [Str "rtl"]) -> True
- _ -> False
-
- let env = defaultWriterEnv {
- envRTL = isRTLmeta
- , envChangesAuthor = fromMaybe "unknown" username
- , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
- , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
- }
-
-
- ((contents, footnotes), st) <- runStateT
- (runReaderT
- (writeOpenXML opts{writerWrapText = WrapNone} doc')
- env)
- initialSt
- let epochtime = floor $ utcTimeToPOSIXSeconds utctime
- let imgs = M.elems $ stImages st
-
- -- create entries for images in word/media/...
- let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
- let imageEntries = map toImageEntry imgs
-
- let stdAttributes =
- [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
- ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
- ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
- ,("xmlns:o","urn:schemas-microsoft-com:office:office")
- ,("xmlns:v","urn:schemas-microsoft-com:vml")
- ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
- ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
- ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
- ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
-
-
- parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels"
- let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
- let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
- let headers = filterElements isHeaderNode parsedRels
- let footers = filterElements isFooterNode parsedRels
-
- let extractTarget = findAttr (QName "Target" Nothing Nothing)
-
- -- we create [Content_Types].xml and word/_rels/document.xml.rels
- -- from scratch rather than reading from reference.docx,
- -- because Word sometimes changes these files when a reference.docx is modified,
- -- e.g. deleting the reference to footnotes.xml or removing default entries
- -- for image content types.
-
- -- [Content_Types].xml
- let mkOverrideNode (part', contentType') = mknode "Override"
- [("PartName",part'),("ContentType",contentType')] ()
- let mkImageOverride (_, imgpath, mbMimeType, _, _) =
- mkOverrideNode ("/word/" ++ imgpath,
- fromMaybe "application/octet-stream" mbMimeType)
- let mkMediaOverride imgpath =
- mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath)
- let overrides = map mkOverrideNode (
- [("/word/webSettings.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
- ,("/word/numbering.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml")
- ,("/word/settings.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml")
- ,("/word/theme/theme1.xml",
- "application/vnd.openxmlformats-officedocument.theme+xml")
- ,("/word/fontTable.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml")
- ,("/docProps/app.xml",
- "application/vnd.openxmlformats-officedocument.extended-properties+xml")
- ,("/docProps/core.xml",
- "application/vnd.openxmlformats-package.core-properties+xml")
- ,("/word/styles.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml")
- ,("/word/document.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
- ,("/word/footnotes.xml",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
- ] ++
- map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
- "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
- map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
- "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
- map mkImageOverride imgs ++
- map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive
- , "word/media/" `isPrefixOf` eRelativePath e ]
-
- let defaultnodes = [mknode "Default"
- [("Extension","xml"),("ContentType","application/xml")] (),
- mknode "Default"
- [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
- let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
- let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
- $ renderXml contentTypesDoc
-
- -- word/_rels/document.xml.rels
- let toBaseRel (url', id', target') = mknode "Relationship"
- [("Type",url')
- ,("Id",id')
- ,("Target",target')] ()
- let baserels' = map toBaseRel
- [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering",
- "rId1",
- "numbering.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles",
- "rId2",
- "styles.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings",
- "rId3",
- "settings.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings",
- "rId4",
- "webSettings.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
- "rId5",
- "fontTable.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme",
- "rId6",
- "theme/theme1.xml")
- ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
- "rId7",
- "footnotes.xml")
- ]
-
- let idMap = renumIdMap (length baserels' + 1) (headers ++ footers)
- let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
- let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
- let baserels = baserels' ++ renumHeaders ++ renumFooters
- let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
- let imgrels = map toImgRel imgs
- let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
- let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
- let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels
- let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
- $ renderXml reldoc
-
-
- -- adjust contents to add sectPr from reference.docx
- let sectpr = case mbsectpr of
- Just sectpr' -> let cs = renumIds
- (\q -> qName q == "id" && qPrefix q == Just "r")
- idMap
- (elChildren sectpr')
- in
- add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs
- Nothing -> (mknode "w:sectPr" [] ())
-
- -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
- let contents' = contents ++ [sectpr]
- let docContents = mknode "w:document" stdAttributes
- $ mknode "w:body" [] contents'
-
-
-
- -- word/document.xml
- let contentEntry = toEntry "word/document.xml" epochtime
- $ renderXml docContents
-
- -- footnotes
- let notes = mknode "w:footnotes" stdAttributes footnotes
- let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes
-
- -- footnote rels
- let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
- $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
- linkrels
-
- -- styles
-
- -- We only want to inject paragraph and text properties that
- -- are not already in the style map. Note that keys in the stylemap
- -- are normalized as lowercase.
- let newDynamicParaProps = filter
- (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps)
- (stDynamicParaProps st)
-
- newDynamicTextProps = filter
- (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps)
- (stDynamicTextProps st)
-
- let newstyles = map newParaPropToOpenXml newDynamicParaProps ++
- map newTextPropToOpenXml newDynamicTextProps ++
- (case writerHighlightStyle opts of
- Nothing -> []
- Just sty -> (styleToOpenXml styleMaps sty))
- let styledoc' = styledoc{ elContent = elContent styledoc ++
- map Elem newstyles }
- let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-
- -- construct word/numbering.xml
- let numpath = "word/numbering.xml"
- numbering <- parseXml refArchive distArchive numpath
- newNumElts <- mkNumbering (stLists st)
- let allElts = onlyElems (elContent numbering) ++ newNumElts
- let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent =
- -- we want all the abstractNums first, then the nums,
- -- otherwise things break:
- [Elem e | e <- allElts
- , qName (elName e) == "abstractNum" ] ++
- [Elem e | e <- allElts
- , qName (elName e) == "num" ] }
- let docPropsPath = "docProps/core.xml"
- let docProps = mknode "cp:coreProperties"
- [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
- ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
- ,("xmlns:dcterms","http://purl.org/dc/terms/")
- ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
- ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
- $ mknode "dc:title" [] (stringify $ docTitle meta)
- : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
- : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
- , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
- ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
- let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
-
- let relsPath = "_rels/.rels"
- let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
- $ map (\attrs -> mknode "Relationship" attrs ())
- [ [("Id","rId1")
- ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
- ,("Target","word/document.xml")]
- , [("Id","rId4")
- ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties")
- ,("Target","docProps/app.xml")]
- , [("Id","rId3")
- ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties")
- ,("Target","docProps/core.xml")]
- ]
- let relsEntry = toEntry relsPath epochtime $ renderXml rels
-
- -- we use dist archive for settings.xml, because Word sometimes
- -- adds references to footnotes or endnotes we don't have...
- -- we do, however, copy some settings over from reference
- let settingsPath = "word/settings.xml"
- settingsList = [ "w:autoHyphenation"
- , "w:consecutiveHyphenLimit"
- , "w:hyphenationZone"
- , "w:doNotHyphenateCap"
- ]
- settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
-
- let entryFromArchive arch path =
- maybe (fail $ path ++ " missing in reference docx")
- return
- (findEntryByPath path arch `mplus` findEntryByPath path distArchive)
- docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
- themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
- fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
- webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
- headerFooterEntries <- mapM (entryFromArchive refArchive) $
- mapMaybe (fmap ("word/" ++) . extractTarget)
- (headers ++ footers)
- let miscRelEntries = [ e | e <- zEntries refArchive
- , "word/_rels/" `isPrefixOf` (eRelativePath e)
- , ".xml.rels" `isSuffixOf` (eRelativePath e)
- , eRelativePath e /= "word/_rels/document.xml.rels"
- , eRelativePath e /= "word/_rels/footnotes.xml.rels" ]
- let otherMediaEntries = [ e | e <- zEntries refArchive
- , "word/media/" `isPrefixOf` eRelativePath e ]
-
- -- Create archive
- let archive = foldr addEntryToArchive emptyArchive $
- contentTypesEntry : relsEntry : contentEntry : relEntry :
- footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
- docPropsEntry : docPropsAppEntry : themeEntry :
- fontTableEntry : settingsEntry : webSettingsEntry :
- imageEntries ++ headerFooterEntries ++
- miscRelEntries ++ otherMediaEntries
- return $ fromArchive archive
-
-
-newParaPropToOpenXml :: String -> Element
-newParaPropToOpenXml s =
- let styleId = filter (not . isSpace) s
- in mknode "w:style" [ ("w:type", "paragraph")
- , ("w:customStyle", "1")
- , ("w:styleId", styleId)]
- [ mknode "w:name" [("w:val", s)] ()
- , mknode "w:basedOn" [("w:val","BodyText")] ()
- , mknode "w:qFormat" [] ()
- ]
-
-newTextPropToOpenXml :: String -> Element
-newTextPropToOpenXml s =
- let styleId = filter (not . isSpace) s
- in mknode "w:style" [ ("w:type", "character")
- , ("w:customStyle", "1")
- , ("w:styleId", styleId)]
- [ mknode "w:name" [("w:val", s)] ()
- , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
- ]
-
-styleToOpenXml :: StyleMaps -> Style -> [Element]
-styleToOpenXml sm style =
- maybeToList parStyle ++ mapMaybe toStyle alltoktypes
- where alltoktypes = enumFromTo KeywordTok NormalTok
- toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
- | otherwise = Just $
- mknode "w:style" [("w:type","character"),
- ("w:customStyle","1"),("w:styleId",show toktype)]
- [ mknode "w:name" [("w:val",show toktype)] ()
- , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
- , mknode "w:rPr" [] $
- [ mknode "w:color" [("w:val",tokCol toktype)] ()
- | tokCol toktype /= "auto" ] ++
- [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
- | tokBg toktype /= "auto" ] ++
- [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
- [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
- [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
- ]
- tokStyles = tokenStyles style
- tokFeature f toktype = maybe False f $ lookup toktype tokStyles
- tokCol toktype = maybe "auto" (drop 1 . fromColor)
- $ (tokenColor =<< lookup toktype tokStyles)
- `mplus` defaultColor style
- tokBg toktype = maybe "auto" (drop 1 . fromColor)
- $ (tokenBackground =<< lookup toktype tokStyles)
- `mplus` backgroundColor style
- parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
- | otherwise = Just $
- mknode "w:style" [("w:type","paragraph"),
- ("w:customStyle","1"),("w:styleId","SourceCode")]
- [ mknode "w:name" [("w:val","Source Code")] ()
- , mknode "w:basedOn" [("w:val","Normal")] ()
- , mknode "w:link" [("w:val","VerbatimChar")] ()
- , mknode "w:pPr" []
- $ mknode "w:wordWrap" [("w:val","off")] ()
- : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
- $ backgroundColor style )
- ]
-
-copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
-copyChildren refArchive distArchive path timestamp elNames = do
- ref <- parseXml refArchive distArchive path
- dist <- parseXml distArchive distArchive path
- return $ toEntry path timestamp $ renderXml dist{
- elContent = elContent dist ++ copyContent ref
- }
- where
- strName QName{qName=name, qPrefix=prefix}
- | Just p <- prefix = p++":"++name
- | otherwise = name
- shouldCopy = (`elem` elNames) . strName
- cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
- copyContent = map cleanElem . filterChildrenName shouldCopy
-
--- this is the lowest number used for a list numId
-baseListId :: Int
-baseListId = 1000
-
-mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element]
-mkNumbering lists = do
- elts <- mapM mkAbstractNum (ordNub lists)
- return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
-
-mkNum :: ListMarker -> Int -> Element
-mkNum marker numid =
- mknode "w:num" [("w:numId",show numid)]
- $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
- : case marker of
- NoMarker -> []
- BulletMarker -> []
- NumberMarker _ _ start ->
- map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
- $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
-
-mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element
-mkAbstractNum marker = do
- gen <- P.newStdGen
- let (nsid, _) = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen
- return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
- $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
- : mknode "w:multiLevelType" [("w:val","multilevel")] ()
- : map (mkLvl marker) [0..6]
-
-mkLvl :: ListMarker -> Int -> Element
-mkLvl marker lvl =
- mknode "w:lvl" [("w:ilvl",show lvl)] $
- [ mknode "w:start" [("w:val",start)] ()
- | marker /= NoMarker && marker /= BulletMarker ] ++
- [ mknode "w:numFmt" [("w:val",fmt)] ()
- , mknode "w:lvlText" [("w:val",lvltxt)] ()
- , mknode "w:lvlJc" [("w:val","left")] ()
- , mknode "w:pPr" []
- [ mknode "w:tabs" []
- $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
- , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
- ]
- ]
- where (fmt, lvltxt, start) =
- case marker of
- NoMarker -> ("bullet"," ","1")
- BulletMarker -> ("bullet",bulletFor lvl,"1")
- NumberMarker st de n -> (styleFor st lvl
- ,patternFor de ("%" ++ show (lvl + 1))
- ,show n)
- step = 720
- hang = 480
- bulletFor 0 = "\x2022" -- filled circle
- bulletFor 1 = "\x2013" -- en dash
- bulletFor 2 = "\x2022" -- hyphen bullet
- bulletFor 3 = "\x2013"
- bulletFor 4 = "\x2022"
- bulletFor 5 = "\x2013"
- bulletFor _ = "\x2022"
- styleFor UpperAlpha _ = "upperLetter"
- styleFor LowerAlpha _ = "lowerLetter"
- styleFor UpperRoman _ = "upperRoman"
- styleFor LowerRoman _ = "lowerRoman"
- styleFor Decimal _ = "decimal"
- styleFor DefaultStyle 1 = "decimal"
- styleFor DefaultStyle 2 = "lowerLetter"
- styleFor DefaultStyle 3 = "lowerRoman"
- styleFor DefaultStyle 4 = "decimal"
- styleFor DefaultStyle 5 = "lowerLetter"
- styleFor DefaultStyle 6 = "lowerRoman"
- styleFor _ _ = "decimal"
- patternFor OneParen s = s ++ ")"
- patternFor TwoParens s = "(" ++ s ++ ")"
- patternFor _ s = s ++ "."
-
-getNumId :: (PandocMonad m) => WS m Int
-getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
-
-
-makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
-makeTOC opts | writerTableOfContents opts = do
- let depth = "1-"++(show (writerTOCDepth opts))
- let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
- tocTitle <- gets stTocTitle
- title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
- return $
- [mknode "w:sdt" [] ([
- mknode "w:sdtPr" [] (
- mknode "w:docPartObj" [] (
- [mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
- mknode "w:docPartUnique" [] ()]
- ) -- w:docPartObj
- ), -- w:sdtPr
- mknode "w:sdtContent" [] (title++[
- mknode "w:p" [] (
- mknode "w:r" [] ([
- mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
- mknode "w:instrText" [("xml:space","preserve")] tocCmd,
- mknode "w:fldChar" [("w:fldCharType","separate")] (),
- mknode "w:fldChar" [("w:fldCharType","end")] ()
- ]) -- w:r
- ) -- w:p
- ])
- ])] -- w:sdt
-makeTOC _ = return []
-
-
--- | Convert Pandoc document to two lists of
--- OpenXML elements (the main document and footnotes).
-writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element])
-writeOpenXML opts (Pandoc meta blocks) = do
- let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> LineBreak : xs
- _ -> []
- let auths = docAuthors meta
- let dat = docDate meta
- let abstract' = case lookupMeta "abstract" meta of
- Just (MetaBlocks bs) -> bs
- Just (MetaInlines ils) -> [Plain ils]
- _ -> []
- let subtitle' = case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> xs
- Just (MetaBlocks [Para xs]) -> xs
- Just (MetaInlines xs) -> xs
- _ -> []
- title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
- authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
- map Para auths
- date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
- abstract <- if null abstract'
- then return []
- else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
- let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
- convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
- convertSpace xs = xs
- let blocks' = bottomUp convertSpace blocks
- doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
- notes' <- reverse `fmap` gets stFootnotes
- toc <- makeTOC opts
- let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
- return (meta' ++ doc', notes')
-
--- | Convert a list of Pandoc blocks to OpenXML.
-blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
-blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
-
-pCustomStyle :: String -> Element
-pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
-
-pStyleM :: (PandocMonad m) => String -> WS m XML.Element
-pStyleM styleName = do
- styleMaps <- gets stStyleMaps
- let sty' = getStyleId styleName $ sParaStyleMap styleMaps
- return $ mknode "w:pStyle" [("w:val",sty')] ()
-
-rCustomStyle :: String -> Element
-rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-
-rStyleM :: (PandocMonad m) => String -> WS m XML.Element
-rStyleM styleName = do
- styleMaps <- gets stStyleMaps
- let sty' = getStyleId styleName $ sCharStyleMap styleMaps
- return $ mknode "w:rStyle" [("w:val",sty')] ()
-
-getUniqueId :: (PandocMonad m) => m String
--- the + 20 is to ensure that there are no clashes with the rIds
--- already in word/document.xml.rel
-getUniqueId = (show . (+ 20)) <$> P.newUniqueHash
-
--- | Key for specifying user-defined docx styles.
-dynamicStyleKey :: String
-dynamicStyleKey = "custom-style"
-
--- | Convert a Pandoc block element to OpenXML.
-blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
-blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
-
-blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
-blockToOpenXML' _ Null = return []
-blockToOpenXML' opts (Div (ident,classes,kvs) bs)
- | Just sty <- lookup dynamicStyleKey kvs = do
- modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)}
- withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs
- | Just "rtl" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "rtl")/=) kvs
- local (\env -> env { envRTL = True }) $
- blockToOpenXML opts (Div (ident,classes,kvs') bs)
- | Just "ltr" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "ltr")/=) kvs
- local (\env -> env { envRTL = False }) $
- blockToOpenXML opts (Div (ident,classes,kvs') bs)
-blockToOpenXML' opts (Div (_,["references"],_) bs) = do
- let (hs, bs') = span isHeaderBlock bs
- header <- blocksToOpenXML opts hs
- -- We put the Bibliography style on paragraphs after the header
- rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
- return (header ++ rest)
-blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs
-blockToOpenXML' opts (Header lev (ident,_,_) lst) = do
- setFirstPara
- paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
- getParaProps False
- contents <- inlinesToOpenXML opts lst
- usedIdents <- gets stSectionIds
- let bookmarkName = if null ident
- then uniqueIdent lst usedIdents
- else ident
- modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s }
- id' <- (lift . lift) getUniqueId
- let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
- ,("w:name",bookmarkName)] ()
- let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
- return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
-blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact")
- $ blockToOpenXML opts (Para lst)
--- title beginning with fig: indicates that the image is a figure
-blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
- setFirstPara
- let prop = pCustomStyle $
- if null alt
- then "Figure"
- else "FigureWithCaption"
- paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False)
- contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
- captionNode <- withParaProp (pCustomStyle "ImageCaption")
- $ blockToOpenXML opts (Para alt)
- return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
--- fixDisplayMath sometimes produces a Para [] as artifact
-blockToOpenXML' _ (Para []) = return []
-blockToOpenXML' opts (Para lst) = do
- isFirstPara <- gets stFirstPara
- paraProps <- getParaProps $ case lst of
- [Math DisplayMath _] -> True
- _ -> False
- bodyTextStyle <- pStyleM "Body Text"
- let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
- [] -> [mknode "w:pPr" [] [bodyTextStyle]]
- ps -> ps
- modify $ \s -> s { stFirstPara = False }
- contents <- inlinesToOpenXML opts lst
- return [mknode "w:p" [] (paraProps' ++ contents)]
-blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
-blockToOpenXML' _ b@(RawBlock format str)
- | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
- | otherwise = do
- report $ BlockNotRendered b
- return []
-blockToOpenXML' opts (BlockQuote blocks) = do
- p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
- setFirstPara
- return p
-blockToOpenXML' opts (CodeBlock attrs str) = do
- p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
- setFirstPara
- return p
-blockToOpenXML' _ HorizontalRule = do
- setFirstPara
- return [
- mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
- $ mknode "v:rect" [("style","width:0;height:1.5pt"),
- ("o:hralign","center"),
- ("o:hrstd","t"),("o:hr","t")] () ]
-blockToOpenXML' opts (Table caption aligns widths headers rows) = do
- setFirstPara
- let captionStr = stringify caption
- caption' <- if null caption
- then return []
- else withParaProp (pCustomStyle "TableCaption")
- $ blockToOpenXML opts (Para caption)
- let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
- let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
- $ blocksToOpenXML opts cell
- headers' <- mapM cellToOpenXML $ zip aligns headers
- rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
- let borderProps = mknode "w:tcPr" []
- [ mknode "w:tcBorders" []
- $ mknode "w:bottom" [("w:val","single")] ()
- , mknode "w:vAlign" [("w:val","bottom")] () ]
- let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [pCustomStyle "Compact"]]]
- let mkcell border contents = mknode "w:tc" []
- $ [ borderProps | border ] ++
- if null contents
- then emptyCell
- else contents
- let mkrow border cells = mknode "w:tr" [] $
- [mknode "w:trPr" [] [
- mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
- ++ map (mkcell border) cells
- let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
- let fullrow = 5000 -- 100% specified in pct
- let rowwidth = fullrow * sum widths
- let mkgridcol w = mknode "w:gridCol"
- [("w:w", show (floor (textwidth * w) :: Integer))] ()
- let hasHeader = not (all null headers)
- return $
- caption' ++
- [mknode "w:tbl" []
- ( mknode "w:tblPr" []
- ( mknode "w:tblStyle" [("w:val","TableNormal")] () :
- mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
- mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
- [ mknode "w:tblCaption" [("w:val", captionStr)] ()
- | not (null caption) ] )
- : mknode "w:tblGrid" []
- (if all (==0) widths
- then []
- else map mkgridcol widths)
- : [ mkrow True headers' | hasHeader ] ++
- map (mkrow False) rows'
- )]
-blockToOpenXML' opts (BulletList lst) = do
- let marker = BulletMarker
- addList marker
- numid <- getNumId
- l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
- setFirstPara
- return l
-blockToOpenXML' opts (OrderedList (start, numstyle, numdelim) lst) = do
- let marker = NumberMarker numstyle numdelim start
- addList marker
- numid <- getNumId
- l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
- setFirstPara
- return l
-blockToOpenXML' opts (DefinitionList items) = do
- l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items
- setFirstPara
- return l
-
-definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
-definitionListItemToOpenXML opts (term,defs) = do
- term' <- withParaProp (pCustomStyle "DefinitionTerm")
- $ blockToOpenXML opts (Para term)
- defs' <- withParaProp (pCustomStyle "Definition")
- $ concat `fmap` mapM (blocksToOpenXML opts) defs
- return $ term' ++ defs'
-
-addList :: (PandocMonad m) => ListMarker -> WS m ()
-addList marker = do
- lists <- gets stLists
- modify $ \st -> st{ stLists = lists ++ [marker] }
-
-listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
-listItemToOpenXML _ _ [] = return []
-listItemToOpenXML opts numid (first:rest) = do
- first' <- withNumId numid $ blockToOpenXML opts first
- -- baseListId is the code for no list marker:
- rest' <- withNumId baseListId $ blocksToOpenXML opts rest
- return $ first' ++ rest'
-
-alignmentToString :: Alignment -> [Char]
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
--- | Convert a list of inline elements to OpenXML.
-inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
-inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
-
-withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
-withNumId numid = local $ \env -> env{ envListNumId = numid }
-
-asList :: (PandocMonad m) => WS m a -> WS m a
-asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
-
-getTextProps :: (PandocMonad m) => WS m [Element]
-getTextProps = do
- props <- asks envTextProperties
- return $ if null props
- then []
- else [mknode "w:rPr" [] props]
-
-withTextProp :: PandocMonad m => Element -> WS m a -> WS m a
-withTextProp d p =
- local (\env -> env {envTextProperties = d : envTextProperties env}) p
-
-withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
-withTextPropM = (. flip withTextProp) . (>>=)
-
-getParaProps :: PandocMonad m => Bool -> WS m [Element]
-getParaProps displayMathPara = do
- props <- asks envParaProperties
- listLevel <- asks envListLevel
- numid <- asks envListNumId
- let listPr = if listLevel >= 0 && not displayMathPara
- then [ mknode "w:numPr" []
- [ mknode "w:numId" [("w:val",show numid)] ()
- , mknode "w:ilvl" [("w:val",show listLevel)] () ]
- ]
- else []
- return $ case props ++ listPr of
- [] -> []
- ps -> [mknode "w:pPr" [] ps]
-
-withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
-withParaProp d p =
- local (\env -> env {envParaProperties = d : envParaProperties env}) p
-
-withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
-withParaPropM = (. flip withParaProp) . (>>=)
-
-formattedString :: PandocMonad m => String -> WS m [Element]
-formattedString str = do
- props <- getTextProps
- inDel <- asks envInDel
- return [ mknode "w:r" [] $
- props ++
- [ mknode (if inDel then "w:delText" else "w:t")
- [("xml:space","preserve")] (stripInvalidChars str) ] ]
-
-setFirstPara :: PandocMonad m => WS m ()
-setFirstPara = modify $ \s -> s { stFirstPara = True }
-
--- | Convert an inline element to OpenXML.
-inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
-inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
-
-inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
-inlineToOpenXML' _ (Str str) = formattedString str
-inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
-inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
-inlineToOpenXML' opts (Span (ident,classes,kvs) ils)
- | Just sty <- lookup dynamicStyleKey kvs = do
- let kvs' = filter ((dynamicStyleKey, sty)/=) kvs
- modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)}
- withTextProp (rCustomStyle sty) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | Just "rtl" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "rtl")/=) kvs
- local (\env -> env { envRTL = True }) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | Just "ltr" <- lookup "dir" kvs = do
- let kvs' = filter (("dir", "ltr")/=) kvs
- local (\env -> env { envRTL = False }) $
- inlineToOpenXML opts (Span (ident,classes,kvs') ils)
- | "insertion" `elem` classes = do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
- insId <- gets stInsId
- modify $ \s -> s{stInsId = (insId + 1)}
- x <- inlinesToOpenXML opts ils
- return [ mknode "w:ins" [("w:id", (show insId)),
- ("w:author", author),
- ("w:date", date)]
- x ]
- | "deletion" `elem` classes = do
- defaultAuthor <- asks envChangesAuthor
- defaultDate <- asks envChangesDate
- let author = fromMaybe defaultAuthor (lookup "author" kvs)
- date = fromMaybe defaultDate (lookup "date" kvs)
- delId <- gets stDelId
- modify $ \s -> s{stDelId = (delId + 1)}
- x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils)
- return [ mknode "w:del" [("w:id", (show delId)),
- ("w:author", author),
- ("w:date", date)]
- x ]
- | otherwise = do
- let off x = withTextProp (mknode x [("w:val","0")] ())
- ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
- (if "csl-no-strong" `elem` classes then off "w:b" else id) .
- (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
- $ inlinesToOpenXML opts ils
-inlineToOpenXML' opts (Strong lst) =
- withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
-inlineToOpenXML' opts (Emph lst) =
- withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
-inlineToOpenXML' opts (Subscript lst) =
- withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
- $ inlinesToOpenXML opts lst
-inlineToOpenXML' opts (Superscript lst) =
- withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
- $ inlinesToOpenXML opts lst
-inlineToOpenXML' opts (SmallCaps lst) =
- withTextProp (mknode "w:smallCaps" [] ())
- $ inlinesToOpenXML opts lst
-inlineToOpenXML' opts (Strikeout lst) =
- withTextProp (mknode "w:strike" [] ())
- $ inlinesToOpenXML opts lst
-inlineToOpenXML' _ LineBreak = return [br]
-inlineToOpenXML' _ il@(RawInline f str)
- | f == Format "openxml" = return [ x | Elem x <- parseXML str ]
- | otherwise = do
- report $ InlineNotRendered il
- return []
-inlineToOpenXML' opts (Quoted quoteType lst) =
- inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
- where (open, close) = case quoteType of
- SingleQuote -> ("\x2018", "\x2019")
- DoubleQuote -> ("\x201C", "\x201D")
-inlineToOpenXML' opts (Math mathType str) = do
- when (mathType == DisplayMath) setFirstPara
- res <- (lift . lift) (convertMath writeOMML mathType str)
- case res of
- Right r -> return [r]
- Left il -> inlineToOpenXML' opts il
-inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
-inlineToOpenXML' opts (Code attrs str) = do
- let unhighlighted = intercalate [br] `fmap`
- (mapM formattedString $ lines str)
- formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
- toHlTok (toktype,tok) = mknode "w:r" []
- [ mknode "w:rPr" []
- [ rCustomStyle (show toktype) ]
- , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
- withTextProp (rCustomStyle "VerbatimChar")
- $ case writerHighlightStyle opts >> highlight formatOpenXML attrs str of
- Just h -> return h
- Nothing -> unhighlighted
-inlineToOpenXML' opts (Note bs) = do
- notes <- gets stFootnotes
- notenum <- (lift . lift) getUniqueId
- footnoteStyle <- rStyleM "Footnote Reference"
- let notemarker = mknode "w:r" []
- [ mknode "w:rPr" [] footnoteStyle
- , mknode "w:footnoteRef" [] () ]
- let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
- let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
- insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs
- insertNoteRef xs = Para [notemarkerXml] : xs
-
- contents <- local (\env -> env{ envListLevel = -1
- , envParaProperties = []
- , envTextProperties = [] })
- (withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
- $ insertNoteRef bs)
- let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
- modify $ \s -> s{ stFootnotes = newnote : notes }
- return [ mknode "w:r" []
- [ mknode "w:rPr" [] footnoteStyle
- , mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
--- internal link:
-inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do
- contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
- return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
--- external link:
-inlineToOpenXML' opts (Link _ txt (src,_)) = do
- contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
- extlinks <- gets stExternalLinks
- id' <- case M.lookup src extlinks of
- Just i -> return i
- Nothing -> do
- i <- ("rId"++) `fmap` ((lift . lift) getUniqueId)
- modify $ \st -> st{ stExternalLinks =
- M.insert src i extlinks }
- return i
- return [ mknode "w:hyperlink" [("r:id",id')] contents ]
-inlineToOpenXML' opts (Image attr alt (src, title)) = do
- -- first, check to see if we've already done this image
- pageWidth <- asks envPrintWidth
- imgs <- gets stImages
- case M.lookup src imgs of
- Just (_,_,_,elt,_) -> return [elt]
- Nothing -> do
- res <- runExceptT $ lift (P.fetchItem (writerSourceURL opts) src)
- case res of
- Left (_ :: PandocError) -> do
- report $ CouldNotFetchResource src ""
- -- emit alt text
- inlinesToOpenXML opts alt
- Right (img, mt) -> do
- ident <- ("rId"++) `fmap` ((lift . lift) getUniqueId)
- let (xpt,ypt) = desiredSizeInPoints opts attr
- (either (const def) id (imageSize img))
- -- 12700 emu = 1 pt
- let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700)
- let cNvPicPr = mknode "pic:cNvPicPr" [] $
- mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
- let nvPicPr = mknode "pic:nvPicPr" []
- [ mknode "pic:cNvPr"
- [("descr",src),("id","0"),("name","Picture")] ()
- , cNvPicPr ]
- let blipFill = mknode "pic:blipFill" []
- [ mknode "a:blip" [("r:embed",ident)] ()
- , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
- let xfrm = mknode "a:xfrm" []
- [ mknode "a:off" [("x","0"),("y","0")] ()
- , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
- let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
- mknode "a:avLst" [] ()
- let ln = mknode "a:ln" [("w","9525")]
- [ mknode "a:noFill" [] ()
- , mknode "a:headEnd" [] ()
- , mknode "a:tailEnd" [] () ]
- let spPr = mknode "pic:spPr" [("bwMode","auto")]
- [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
- let graphic = mknode "a:graphic" [] $
- mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
- [ mknode "pic:pic" []
- [ nvPicPr
- , blipFill
- , spPr ] ]
- let imgElt = mknode "w:r" [] $
- mknode "w:drawing" [] $
- mknode "wp:inline" []
- [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
- , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
- , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] ()
- , graphic ]
- let imgext = case mt >>= extensionFromMimeType of
- Just x -> '.':x
- Nothing -> case imageType img of
- Just Png -> ".png"
- Just Jpeg -> ".jpeg"
- Just Gif -> ".gif"
- Just Pdf -> ".pdf"
- Just Eps -> ".eps"
- Nothing -> ""
- if null imgext
- then -- without an extension there is no rule for content type
- inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
- else do
- let imgpath = "media/" ++ ident ++ imgext
- let mbMimeType = mt <|> getMimeType imgpath
- -- insert mime type to use in constructing [Content_Types].xml
- modify $ \st -> st{ stImages =
- M.insert src (ident, imgpath, mbMimeType, imgElt, img)
- $ stImages st }
- return [imgElt]
-
-br :: Element
-br = breakElement "textWrapping"
-
-breakElement :: String -> Element
-breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ]
-
--- Word will insert these footnotes into the settings.xml file
--- (whether or not they're visible in the document). If they're in the
--- file, but not in the footnotes.xml file, it will produce
--- problems. So we want to make sure we insert them into our document.
-defaultFootnotes :: [Element]
-defaultFootnotes = [ mknode "w:footnote"
- [("w:type", "separator"), ("w:id", "-1")] $
- [ mknode "w:p" [] $
- [mknode "w:r" [] $
- [ mknode "w:separator" [] ()]]]
- , mknode "w:footnote"
- [("w:type", "continuationSeparator"), ("w:id", "0")] $
- [ mknode "w:p" [] $
- [ mknode "w:r" [] $
- [ mknode "w:continuationSeparator" [] ()]]]]
-
-parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element
-parseXml refArchive distArchive relpath =
- case findEntryByPath relpath refArchive `mplus`
- findEntryByPath relpath distArchive of
- Nothing -> fail $ relpath ++ " missing in reference docx"
- Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
- Nothing -> fail $ relpath ++ " corrupt in reference docx"
- Just d -> return d
-
--- | Scales the image to fit the page
--- sizes are passed in emu
-fitToPage :: (Double, Double) -> Integer -> (Integer, Integer)
-fitToPage (x, y) pageWidth
- -- Fixes width to the page width and scales the height
- | x > fromIntegral pageWidth =
- (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y)
- | otherwise = (floor x, floor y)
-
-withDirection :: PandocMonad m => WS m a -> WS m a
-withDirection x = do
- isRTL <- asks envRTL
- paraProps <- asks envParaProperties
- textProps <- asks envTextProperties
- -- We want to clean all bidirection (bidi) and right-to-left (rtl)
- -- properties from the props first. This is because we don't want
- -- them to stack up.
- let paraProps' = filter (\e -> (qName . elName) e /= "bidi") paraProps
- textProps' = filter (\e -> (qName . elName) e /= "rtl") textProps
- if isRTL
- -- if we are going right-to-left, we (re?)add the properties.
- then flip local x $
- \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps'
- , envTextProperties = (mknode "w:rtl" [] ()) : textProps'
- }
- else flip local x $ \env -> env { envParaProperties = paraProps'
- , envTextProperties = textProps'
- }
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
deleted file mode 100644
index 79a371d4d..000000000
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ /dev/null
@@ -1,522 +0,0 @@
-{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.DokuWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : Clare Macrae <clare.macrae@googlemail.com>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to DokuWiki markup.
-
-DokuWiki: <https://www.dokuwiki.org/dokuwiki>
--}
-
-{-
- [ ] Implement nested blockquotes (currently only ever does one level)
- [x] Implement alignment of text in tables
- [ ] Implement comments
- [ ] Work through the Dokuwiki spec, and check I've not missed anything out
- [ ] Remove dud/duplicate code
--}
-
-module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options ( WriterOptions(
- writerTableOfContents
- , writerTemplate
- , writerWrapText), WrapOption(..) )
-import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting
- , camelCaseToHyphenated, trimr, substitute )
-import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Data.List ( intersect, intercalate, isPrefixOf, transpose )
-import Data.Default (Default(..))
-import Network.URI ( isURI )
-import Control.Monad ( zipWithM )
-import Control.Monad.State ( modify, State, get, evalState )
-import Control.Monad.Reader ( ReaderT, runReaderT, ask, local )
-import Text.Pandoc.Class (PandocMonad)
-
-data WriterState = WriterState {
- stNotes :: Bool -- True if there are notes
- }
-
-data WriterEnvironment = WriterEnvironment {
- stIndent :: String -- Indent after the marker at the beginning of list items
- , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
- , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell)
- }
-
-instance Default WriterState where
- def = WriterState { stNotes = False }
-
-instance Default WriterEnvironment where
- def = WriterEnvironment { stIndent = ""
- , stUseTags = False
- , stBackSlashLB = False }
-
-type DokuWiki = ReaderT WriterEnvironment (State WriterState)
-
--- | Convert Pandoc to DokuWiki.
-writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeDokuWiki opts document = return $
- runDokuWiki (pandocToDokuWiki opts document)
-
-runDokuWiki :: DokuWiki a -> a
-runDokuWiki = flip evalState def . flip runReaderT def
-
--- | Return DokuWiki representation of document.
-pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String
-pandocToDokuWiki opts (Pandoc meta blocks) = do
- metadata <- metaToJSON opts
- (fmap trimr . blockListToDokuWiki opts)
- (inlineListToDokuWiki opts)
- meta
- body <- blockListToDokuWiki opts blocks
- notesExist <- stNotes <$> get
- let notes = if notesExist
- then "" -- TODO Was "\n<references />" Check whether I can really remove this:
- -- if it is definitely to do with footnotes, can remove this whole bit
- else ""
- let main = body ++ notes
- let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Escape special characters for DokuWiki.
-escapeString :: String -> String
-escapeString = substitute "__" "%%__%%" .
- substitute "**" "%%**%%" .
- substitute "//" "%%//%%"
-
--- | Convert Pandoc block element to DokuWiki.
-blockToDokuWiki :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> DokuWiki String
-
-blockToDokuWiki _ Null = return ""
-
-blockToDokuWiki opts (Div _attrs bs) = do
- contents <- blockListToDokuWiki opts bs
- return $ contents ++ "\n"
-
-blockToDokuWiki opts (Plain inlines) =
- inlineListToDokuWiki opts inlines
-
--- title beginning with fig: indicates that the image is a figure
--- dokuwiki doesn't support captions - so combine together alt and caption into alt
-blockToDokuWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- if null txt
- then return ""
- else (" " ++) `fmap` inlineListToDokuWiki opts txt
- let opt = if null txt
- then ""
- else "|" ++ if null tit then capt else tit ++ capt
- -- Relative links fail isURI and receive a colon
- prefix = if isURI src then "" else ":"
- return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n"
-
-blockToDokuWiki opts (Para inlines) = do
- indent <- stIndent <$> ask
- useTags <- stUseTags <$> ask
- contents <- inlineListToDokuWiki opts inlines
- return $ if useTags
- then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>"
- else contents ++ if null indent then "\n" else ""
-
-blockToDokuWiki opts (LineBlock lns) =
- blockToDokuWiki opts $ linesToPara lns
-
-blockToDokuWiki _ (RawBlock f str)
- | f == Format "dokuwiki" = return str
- -- See https://www.dokuwiki.org/wiki:syntax
- -- use uppercase HTML tag for block-level content:
- | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>"
- | otherwise = return ""
-
-blockToDokuWiki _ HorizontalRule = return "\n----\n"
-
-blockToDokuWiki opts (Header level _ inlines) = do
- -- emphasis, links etc. not allowed in headers, apparently,
- -- so we remove formatting:
- contents <- inlineListToDokuWiki opts $ removeFormatting inlines
- let eqs = replicate ( 7 - level ) '='
- return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
-
-blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
- let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
- "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
- "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
- "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
- "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
- "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
- "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
- "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
- "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
- return $ "<code" ++
- (case at of
- [] -> ">\n"
- (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>"
-
-blockToDokuWiki opts (BlockQuote blocks) = do
- contents <- blockListToDokuWiki opts blocks
- if isSimpleBlockQuote blocks
- then return $ unlines $ map ("> " ++) $ lines contents
- else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>"
-
-blockToDokuWiki opts (Table capt aligns _ headers rows) = do
- captionDoc <- if null capt
- then return ""
- else do
- c <- inlineListToDokuWiki opts capt
- return $ "" ++ c ++ "\n"
- headers' <- if all null headers
- then return []
- else zipWithM (tableItemToDokuWiki opts) aligns headers
- rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
- let widths = map (maximum . map length) $ transpose (headers':rows')
- let padTo (width, al) s =
- case (width - length s) of
- x | x > 0 ->
- if al == AlignLeft || al == AlignDefault
- then s ++ replicate x ' '
- else if al == AlignRight
- then replicate x ' ' ++ s
- else replicate (x `div` 2) ' ' ++
- s ++ replicate (x - x `div` 2) ' '
- | otherwise -> s
- let renderRow sep cells = sep ++
- intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep
- return $ captionDoc ++
- (if null headers' then "" else renderRow "^" headers' ++ "\n") ++
- unlines (map (renderRow "|") rows')
-
-blockToDokuWiki opts x@(BulletList items) = do
- oldUseTags <- stUseTags <$> ask
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
- then do
- contents <- local (\s -> s { stUseTags = True })
- (mapM (listItemToDokuWiki opts) items)
- return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n"
- else do
- contents <- local (\s -> s { stIndent = stIndent s ++ " "
- , stBackSlashLB = backSlash})
- (mapM (listItemToDokuWiki opts) items)
- return $ vcat contents ++ if null indent then "\n" else ""
-
-blockToDokuWiki opts x@(OrderedList attribs items) = do
- oldUseTags <- stUseTags <$> ask
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
- then do
- contents <- local (\s -> s { stUseTags = True })
- (mapM (orderedListItemToDokuWiki opts) items)
- return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n"
- else do
- contents <- local (\s -> s { stIndent = stIndent s ++ " "
- , stBackSlashLB = backSlash})
- (mapM (orderedListItemToDokuWiki opts) items)
- return $ vcat contents ++ if null indent then "\n" else ""
-
--- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there
--- is a specific representation of them.
--- TODO This creates double '; ; ' if there is a bullet or ordered list inside a definition list
-blockToDokuWiki opts x@(DefinitionList items) = do
- oldUseTags <- stUseTags <$> ask
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
- then do
- contents <- local (\s -> s { stUseTags = True })
- (mapM (definitionListItemToDokuWiki opts) items)
- return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n"
- else do
- contents <- local (\s -> s { stIndent = stIndent s ++ " "
- , stBackSlashLB = backSlash})
- (mapM (definitionListItemToDokuWiki opts) items)
- return $ vcat contents ++ if null indent then "\n" else ""
-
--- Auxiliary functions for lists:
-
--- | Convert ordered list attributes to HTML attribute string
-listAttribsToString :: ListAttributes -> String
-listAttribsToString (startnum, numstyle, _) =
- let numstyle' = camelCaseToHyphenated $ show numstyle
- in (if startnum /= 1
- then " start=\"" ++ show startnum ++ "\""
- else "") ++
- (if numstyle /= DefaultStyle
- then " style=\"list-style-type: " ++ numstyle' ++ ";\""
- else "")
-
--- | Convert bullet list item (list of blocks) to DokuWiki.
-listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
-listItemToDokuWiki opts items = do
- contents <- blockListToDokuWiki opts items
- useTags <- stUseTags <$> ask
- if useTags
- then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
- else do
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
- return $ indent' ++ "* " ++ contents
-
--- | Convert ordered list item (list of blocks) to DokuWiki.
--- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki
-orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
-orderedListItemToDokuWiki opts items = do
- contents <- blockListToDokuWiki opts items
- useTags <- stUseTags <$> ask
- if useTags
- then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
- else do
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
- return $ indent' ++ "- " ++ contents
-
--- | Convert definition list item (label, list of blocks) to DokuWiki.
-definitionListItemToDokuWiki :: WriterOptions
- -> ([Inline],[[Block]])
- -> DokuWiki String
-definitionListItemToDokuWiki opts (label, items) = do
- labelText <- inlineListToDokuWiki opts label
- contents <- mapM (blockListToDokuWiki opts) items
- useTags <- stUseTags <$> ask
- if useTags
- then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++
- (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents)
- else do
- indent <- stIndent <$> ask
- backSlash <- stBackSlashLB <$> ask
- let indent' = if backSlash then (drop 2 indent) else indent
- return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents
-
--- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
-isSimpleList :: Block -> Bool
-isSimpleList x =
- case x of
- BulletList items -> all isSimpleListItem items
- OrderedList (num, sty, _) items -> all isSimpleListItem items &&
- num == 1 && sty `elem` [DefaultStyle, Decimal]
- DefinitionList items -> all isSimpleListItem $ concatMap snd items
- _ -> False
-
--- | True if list item can be handled with the simple wiki syntax. False if
--- HTML tags will be needed.
-isSimpleListItem :: [Block] -> Bool
-isSimpleListItem [] = True
-isSimpleListItem [x] =
- case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- DefinitionList _ -> isSimpleList x
- _ -> False
-isSimpleListItem [x, y] | isPlainOrPara x =
- case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- DefinitionList _ -> isSimpleList y
- _ -> False
-isSimpleListItem _ = False
-
-isPlainOrPara :: Block -> Bool
-isPlainOrPara (Plain _) = True
-isPlainOrPara (Para _) = True
-isPlainOrPara _ = False
-
-isSimpleBlockQuote :: [Block] -> Bool
-isSimpleBlockQuote bs = all isPlainOrPara bs
-
--- | Concatenates strings with line breaks between them.
-vcat :: [String] -> String
-vcat = intercalate "\n"
-
-backSlashLineBreaks :: String -> String
-backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs
- where f '\n' = "\\\\ "
- f c = [c]
- g (' ' : '\\':'\\': xs) = xs
- g s = s
-
--- Auxiliary functions for tables:
-
-tableItemToDokuWiki :: WriterOptions
- -> Alignment
- -> [Block]
- -> DokuWiki String
-tableItemToDokuWiki opts align' item = do
- let mkcell x = (if align' == AlignRight || align' == AlignCenter
- then " "
- else "") ++ x ++
- (if align' == AlignLeft || align' == AlignCenter
- then " "
- else "")
- contents <- local (\s -> s { stBackSlashLB = True }) $
- blockListToDokuWiki opts item
- return $ mkcell contents
-
--- | Convert list of Pandoc block elements to DokuWiki.
-blockListToDokuWiki :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> DokuWiki String
-blockListToDokuWiki opts blocks = do
- backSlash <- stBackSlashLB <$> ask
- let blocks' = consolidateRawBlocks blocks
- if backSlash
- then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks'
- else vcat <$> mapM (blockToDokuWiki opts) blocks'
-
-consolidateRawBlocks :: [Block] -> [Block]
-consolidateRawBlocks [] = []
-consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs)
- | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs)
-consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs
-
--- | Convert list of Pandoc inline elements to DokuWiki.
-inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String
-inlineListToDokuWiki opts lst =
- concat <$> (mapM (inlineToDokuWiki opts) lst)
-
--- | Convert Pandoc inline element to DokuWiki.
-inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String
-
-inlineToDokuWiki opts (Span _attrs ils) =
- inlineListToDokuWiki opts ils
-
-inlineToDokuWiki opts (Emph lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "//" ++ contents ++ "//"
-
-inlineToDokuWiki opts (Strong lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "**" ++ contents ++ "**"
-
-inlineToDokuWiki opts (Strikeout lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "<del>" ++ contents ++ "</del>"
-
-inlineToDokuWiki opts (Superscript lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "<sup>" ++ contents ++ "</sup>"
-
-inlineToDokuWiki opts (Subscript lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "<sub>" ++ contents ++ "</sub>"
-
-inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst
-
-inlineToDokuWiki opts (Quoted SingleQuote lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "\8216" ++ contents ++ "\8217"
-
-inlineToDokuWiki opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToDokuWiki opts lst
- return $ "\8220" ++ contents ++ "\8221"
-
-inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst
-
-inlineToDokuWiki _ (Code _ str) =
- -- In dokuwiki, text surrounded by '' is really just a font statement, i.e. <tt>,
- -- and so other formatting can be present inside.
- -- However, in pandoc, and markdown, inlined code doesn't contain formatting.
- -- So I have opted for using %% to disable all formatting inside inline code blocks.
- -- This gives the best results when converting from other formats to dokuwiki, even if
- -- the resultand code is a little ugly, for short strings that don't contain formatting
- -- characters.
- -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format,
- -- any formatting inside inlined code blocks would be lost, or presented incorrectly.
- return $ "''%%" ++ str ++ "%%''"
-
-inlineToDokuWiki _ (Str str) = return $ escapeString str
-
-inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim
- -- note: str should NOT be escaped
- where delim = case mathType of
- DisplayMath -> "$$"
- InlineMath -> "$"
-
-inlineToDokuWiki _ (RawInline f str)
- | f == Format "dokuwiki" = return str
- | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
- | otherwise = return ""
-
-inlineToDokuWiki _ LineBreak = return "\\\\\n"
-
-inlineToDokuWiki opts SoftBreak =
- case writerWrapText opts of
- WrapNone -> return " "
- WrapAuto -> return " "
- WrapPreserve -> return "\n"
-
-inlineToDokuWiki _ Space = return " "
-
-inlineToDokuWiki opts (Link _ txt (src, _)) = do
- label <- inlineListToDokuWiki opts txt
- case txt of
- [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
- | escapeURI s == src -> return src
- _ -> if isURI src
- then return $ "[[" ++ src ++ "|" ++ label ++ "]]"
- else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
- where src' = case src of
- '/':xs -> xs -- with leading / it's a
- _ -> src -- link to a help page
-inlineToDokuWiki opts (Image attr alt (source, tit)) = do
- alt' <- inlineListToDokuWiki opts alt
- let txt = case (tit, alt) of
- ("", []) -> ""
- ("", _ ) -> "|" ++ alt'
- (_ , _ ) -> "|" ++ tit
- -- Relative links fail isURI and receive a colon
- prefix = if isURI source then "" else ":"
- return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}"
-
-inlineToDokuWiki opts (Note contents) = do
- contents' <- blockListToDokuWiki opts contents
- modify (\s -> s { stNotes = True })
- return $ "((" ++ contents' ++ "))"
- -- note - may not work for notes with multiple blocks
-
-imageDims :: WriterOptions -> Attr -> String
-imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
- where
- toPx = fmap (showInPixel opts) . checkPct
- checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
- go (Just w) Nothing = "?" ++ w
- go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
- go Nothing (Just h) = "?0x" ++ h
- go Nothing Nothing = ""
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
deleted file mode 100644
index 247014c20..000000000
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ /dev/null
@@ -1,1257 +0,0 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-}
-{-
-Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.EPUB
- Copyright : Copyright (C) 2010-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to EPUB.
--}
-module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
-import Text.Pandoc.Logging
-import qualified Data.Map as M
-import qualified Data.Set as Set
-import Data.Maybe ( fromMaybe, catMaybes )
-import Data.List ( isPrefixOf, isInfixOf, intercalate )
-import Text.Printf (printf)
-import System.FilePath ( takeExtension, takeFileName )
-import Network.HTTP ( urlEncode )
-import qualified Data.ByteString.Lazy as B
-import qualified Data.ByteString.Lazy.Char8 as B8
-import qualified Text.Pandoc.UTF8 as UTF8
-import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
-import Text.Pandoc.Compat.Time
-import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim
- , normalizeDate, stringify
- , hierarchicalize )
-import qualified Text.Pandoc.Shared as S (Element(..))
-import Text.Pandoc.Builder (fromList, setMeta)
-import Text.Pandoc.Options ( WriterOptions(..)
- , WrapOption(..)
- , HTMLMathMethod(..)
- , EPUBVersion(..)
- , ObfuscationMethod(NoObfuscation) )
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk (walk, walkM, query)
-import Text.Pandoc.UUID (getUUID)
-import Control.Monad.State (modify, get, gets, State, StateT, put, evalState, evalStateT, lift)
-import Control.Monad (mplus, when, zipWithM)
-import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
- , strContent, lookupAttr, Node(..), QName(..), parseXML
- , onlyElems, node, ppElement)
-import Text.Pandoc.Writers.HTML ( writeHtmlStringForEPUB )
-import Data.Char ( toLower, isDigit, isAlphaNum )
-import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
-import Control.Monad.Except (throwError, catchError)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import qualified Text.Pandoc.Class as P
-
--- A Chapter includes a list of blocks and maybe a section
--- number offset. Note, some chapters are unnumbered. The section
--- number is different from the index number, which will be used
--- in filenames, chapter0003.xhtml.
-data Chapter = Chapter (Maybe [Int]) [Block]
-
-data EPUBState = EPUBState {
- stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))]
- }
-
-type E m = StateT EPUBState m
-
-data EPUBMetadata = EPUBMetadata{
- epubIdentifier :: [Identifier]
- , epubTitle :: [Title]
- , epubDate :: [Date]
- , epubLanguage :: String
- , epubCreator :: [Creator]
- , epubContributor :: [Creator]
- , epubSubject :: [String]
- , epubDescription :: Maybe String
- , epubType :: Maybe String
- , epubFormat :: Maybe String
- , epubPublisher :: Maybe String
- , epubSource :: Maybe String
- , epubRelation :: Maybe String
- , epubCoverage :: Maybe String
- , epubRights :: Maybe String
- , epubCoverImage :: Maybe String
- , epubStylesheet :: Maybe Stylesheet
- , epubPageDirection :: Maybe ProgressionDirection
- } deriving Show
-
-data Stylesheet = StylesheetPath FilePath
- | StylesheetContents String
- deriving Show
-
-data Date = Date{
- dateText :: String
- , dateEvent :: Maybe String
- } deriving Show
-
-data Creator = Creator{
- creatorText :: String
- , creatorRole :: Maybe String
- , creatorFileAs :: Maybe String
- } deriving Show
-
-data Identifier = Identifier{
- identifierText :: String
- , identifierScheme :: Maybe String
- } deriving Show
-
-data Title = Title{
- titleText :: String
- , titleFileAs :: Maybe String
- , titleType :: Maybe String
- } deriving Show
-
-data ProgressionDirection = LTR | RTL deriving Show
-
-dcName :: String -> QName
-dcName n = QName n Nothing (Just "dc")
-
-dcNode :: Node t => String -> t -> Element
-dcNode = node . dcName
-
-opfName :: String -> QName
-opfName n = QName n Nothing (Just "opf")
-
-toId :: FilePath -> String
-toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
- then x
- else '_') . takeFileName
-
-removeNote :: Inline -> Inline
-removeNote (Note _) = Str ""
-removeNote x = x
-
-getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
-getEPUBMetadata opts meta = do
- let md = metadataFromMeta opts meta
- let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
- let md' = foldr addMetadataFromXML md elts
- let addIdentifier m =
- if null (epubIdentifier m)
- then do
- randomId <- (show . getUUID) <$> lift P.newStdGen
- return $ m{ epubIdentifier = [Identifier randomId Nothing] }
- else return m
- let addLanguage m =
- if null (epubLanguage m)
- then case lookup "lang" (writerVariables opts) of
- Just x -> return m{ epubLanguage = x }
- Nothing -> do
- mLang <- lift $ P.lookupEnv "LANG"
- let localeLang =
- case mLang of
- Just lang ->
- map (\c -> if c == '_' then '-' else c) $
- takeWhile (/='.') lang
- Nothing -> "en-US"
- return m{ epubLanguage = localeLang }
- else return m
- let fixDate m =
- if null (epubDate m)
- then do
- currentTime <- lift P.getCurrentTime
- return $ m{ epubDate = [ Date{
- dateText = showDateTimeISO8601 currentTime
- , dateEvent = Nothing } ] }
- else return m
- let addAuthor m =
- if any (\c -> creatorRole c == Just "aut") $ epubCreator m
- then return m
- else do
- let authors' = map stringify $ docAuthors meta
- let toAuthor name = Creator{ creatorText = name
- , creatorRole = Just "aut"
- , creatorFileAs = Nothing }
- return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
- addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage
-
-addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata
-addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
- | name == "identifier" = md{ epubIdentifier =
- Identifier{ identifierText = strContent e
- , identifierScheme = lookupAttr (opfName "scheme") attrs
- } : epubIdentifier md }
- | name == "title" = md{ epubTitle =
- Title{ titleText = strContent e
- , titleFileAs = getAttr "file-as"
- , titleType = getAttr "type"
- } : epubTitle md }
- | name == "date" = md{ epubDate =
- Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e
- , dateEvent = getAttr "event"
- } : epubDate md }
- | name == "language" = md{ epubLanguage = strContent e }
- | name == "creator" = md{ epubCreator =
- Creator{ creatorText = strContent e
- , creatorRole = getAttr "role"
- , creatorFileAs = getAttr "file-as"
- } : epubCreator md }
- | name == "contributor" = md{ epubContributor =
- Creator { creatorText = strContent e
- , creatorRole = getAttr "role"
- , creatorFileAs = getAttr "file-as"
- } : epubContributor md }
- | name == "subject" = md{ epubSubject = strContent e : epubSubject md }
- | name == "description" = md { epubDescription = Just $ strContent e }
- | name == "type" = md { epubType = Just $ strContent e }
- | name == "format" = md { epubFormat = Just $ strContent e }
- | name == "type" = md { epubType = Just $ strContent e }
- | name == "publisher" = md { epubPublisher = Just $ strContent e }
- | name == "source" = md { epubSource = Just $ strContent e }
- | name == "relation" = md { epubRelation = Just $ strContent e }
- | name == "coverage" = md { epubCoverage = Just $ strContent e }
- | name == "rights" = md { epubRights = Just $ strContent e }
- | otherwise = md
- where getAttr n = lookupAttr (opfName n) attrs
-addMetadataFromXML _ md = md
-
-metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s) = s
-metaValueToString (MetaInlines ils) = stringify ils
-metaValueToString (MetaBlocks bs) = stringify bs
-metaValueToString (MetaBool True) = "true"
-metaValueToString (MetaBool False) = "false"
-metaValueToString _ = ""
-
-getList :: String -> Meta -> (MetaValue -> a) -> [a]
-getList s meta handleMetaValue =
- case lookupMeta s meta of
- Just (MetaList xs) -> map handleMetaValue xs
- Just mv -> [handleMetaValue mv]
- Nothing -> []
-
-getIdentifier :: Meta -> [Identifier]
-getIdentifier meta = getList "identifier" meta handleMetaValue
- where handleMetaValue (MetaMap m) =
- Identifier{ identifierText = maybe "" metaValueToString
- $ M.lookup "text" m
- , identifierScheme = metaValueToString <$>
- M.lookup "scheme" m }
- handleMetaValue mv = Identifier (metaValueToString mv) Nothing
-
-getTitle :: Meta -> [Title]
-getTitle meta = getList "title" meta handleMetaValue
- where handleMetaValue (MetaMap m) =
- Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m
- , titleFileAs = metaValueToString <$> M.lookup "file-as" m
- , titleType = metaValueToString <$> M.lookup "type" m }
- handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
-
-getCreator :: String -> Meta -> [Creator]
-getCreator s meta = getList s meta handleMetaValue
- where handleMetaValue (MetaMap m) =
- Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
- , creatorFileAs = metaValueToString <$> M.lookup "file-as" m
- , creatorRole = metaValueToString <$> M.lookup "role" m }
- handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
-
-getDate :: String -> Meta -> [Date]
-getDate s meta = getList s meta handleMetaValue
- where handleMetaValue (MetaMap m) =
- Date{ dateText = maybe "" id $
- M.lookup "text" m >>= normalizeDate' . metaValueToString
- , dateEvent = metaValueToString <$> M.lookup "event" m }
- handleMetaValue mv = Date { dateText = maybe ""
- id $ normalizeDate' $ metaValueToString mv
- , dateEvent = Nothing }
-
-simpleList :: String -> Meta -> [String]
-simpleList s meta =
- case lookupMeta s meta of
- Just (MetaList xs) -> map metaValueToString xs
- Just x -> [metaValueToString x]
- Nothing -> []
-
-metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
-metadataFromMeta opts meta = EPUBMetadata{
- epubIdentifier = identifiers
- , epubTitle = titles
- , epubDate = date
- , epubLanguage = language
- , epubCreator = creators
- , epubContributor = contributors
- , epubSubject = subjects
- , epubDescription = description
- , epubType = epubtype
- , epubFormat = format
- , epubPublisher = publisher
- , epubSource = source
- , epubRelation = relation
- , epubCoverage = coverage
- , epubRights = rights
- , epubCoverImage = coverImage
- , epubStylesheet = stylesheet
- , epubPageDirection = pageDirection
- }
- where identifiers = getIdentifier meta
- titles = getTitle meta
- date = getDate "date" meta
- language = maybe "" metaValueToString $
- lookupMeta "language" meta `mplus` lookupMeta "lang" meta
- creators = getCreator "creator" meta
- contributors = getCreator "contributor" meta
- subjects = simpleList "subject" meta
- description = metaValueToString <$> lookupMeta "description" meta
- epubtype = metaValueToString <$> lookupMeta "type" meta
- format = metaValueToString <$> lookupMeta "format" meta
- publisher = metaValueToString <$> lookupMeta "publisher" meta
- source = metaValueToString <$> lookupMeta "source" meta
- relation = metaValueToString <$> lookupMeta "relation" meta
- coverage = metaValueToString <$> lookupMeta "coverage" meta
- rights = metaValueToString <$> lookupMeta "rights" meta
- coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
- (metaValueToString <$> lookupMeta "cover-image" meta)
- stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
- ((StylesheetPath . metaValueToString) <$>
- lookupMeta "stylesheet" meta)
- pageDirection = case map toLower . metaValueToString <$>
- lookupMeta "page-progression-direction" meta of
- Just "ltr" -> Just LTR
- Just "rtl" -> Just RTL
- _ -> Nothing
-
--- | Produce an EPUB2 file from a Pandoc document.
-writeEPUB2 :: PandocMonad m
- => WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> m B.ByteString
-writeEPUB2 = writeEPUB EPUB2
-
--- | Produce an EPUB3 file from a Pandoc document.
-writeEPUB3 :: PandocMonad m
- => WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> m B.ByteString
-writeEPUB3 = writeEPUB EPUB3
-
--- | Produce an EPUB file from a Pandoc document.
-writeEPUB :: PandocMonad m
- => EPUBVersion
- -> WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> m B.ByteString
-writeEPUB epubVersion opts doc =
- let initState = EPUBState { stMediaPaths = []
- }
- in
- evalStateT (pandocToEPUB epubVersion opts doc)
- initState
-
-pandocToEPUB :: PandocMonad m
- => EPUBVersion
- -> WriterOptions
- -> Pandoc
- -> E m B.ByteString
-pandocToEPUB version opts doc@(Pandoc meta _) = do
- let epub3 = version == EPUB3
- let writeHtml o = fmap UTF8.fromStringLazy .
- writeHtmlStringForEPUB version o
- epochtime <- floor <$> lift P.getPOSIXTime
- let mkEntry path content = toEntry path epochtime content
- let vars = ("epub3", if epub3 then "true" else "false")
- : ("css", "stylesheet.css")
- : writerVariables opts
- let opts' = opts{ writerEmailObfuscation = NoObfuscation
- , writerSectionDivs = True
- , writerVariables = vars
- , writerHTMLMathMethod =
- if epub3
- then MathML
- else writerHTMLMathMethod opts
- , writerWrapText = WrapAuto }
- metadata <- getEPUBMetadata opts' meta
-
- -- cover page
- (cpgEntry, cpicEntry) <-
- case epubCoverImage metadata of
- Nothing -> return ([],[])
- Just img -> do
- let coverImage = "media/" ++ takeFileName img
- cpContent <- lift $ writeHtml
- opts'{ writerVariables = ("coverpage","true"):vars }
- (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
- imgContent <- lift $ P.readFileLazy img
- return ( [mkEntry "cover.xhtml" cpContent]
- , [mkEntry coverImage imgContent] )
-
- -- title page
- tpContent <- lift $ writeHtml opts'{
- writerVariables = ("titlepage","true"):vars }
- (Pandoc meta [])
- let tpEntry = mkEntry "title_page.xhtml" tpContent
-
- -- handle pictures
- -- mediaRef <- P.newIORef []
- Pandoc _ blocks <- walkM (transformInline opts') doc >>=
- walkM (transformBlock opts')
- picEntries <- (catMaybes . map (snd . snd)) <$> (gets stMediaPaths)
- -- handle fonts
- let matchingGlob f = do
- xs <- lift $ P.glob f
- when (null xs) $
- report $ CouldNotFetchResource f "glob did not match any font files"
- return xs
- let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f)
- fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
- fontEntries <- mapM mkFontEntry fontFiles
-
- -- set page progression direction attribution
- let progressionDirection = case epubPageDirection metadata of
- Just LTR | epub3 ->
- [("page-progression-direction", "ltr")]
- Just RTL | epub3 ->
- [("page-progression-direction", "rtl")]
- _ -> []
-
- -- body pages
-
- -- add level 1 header to beginning if none there
- let blocks' = addIdentifiers
- $ case blocks of
- (Header 1 _ _ : _) -> blocks
- _ -> Header 1 ("",["unnumbered"],[])
- (docTitle' meta) : blocks
-
- let chapterHeaderLevel = writerEpubChapterLevel opts
-
- let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
- isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
- n <= chapterHeaderLevel
- isChapterHeader _ = False
-
- let toChapters :: [Block] -> State [Int] [Chapter]
- toChapters [] = return []
- toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) =
- toChapters (bs ++ rest)
- toChapters (Header n attr@(_,classes,_) ils : bs) = do
- nums <- get
- mbnum <- if "unnumbered" `elem` classes
- then return Nothing
- else case splitAt (n - 1) nums of
- (ks, (m:_)) -> do
- let nums' = ks ++ [m+1]
- put nums'
- return $ Just (ks ++ [m])
- -- note, this is the offset not the sec number
- (ks, []) -> do
- let nums' = ks ++ [1]
- put nums'
- return $ Just ks
- let (xs,ys) = break isChapterHeader bs
- (Chapter mbnum (Header n attr ils : xs) :) `fmap` toChapters ys
- toChapters (b:bs) = do
- let (xs,ys) = break isChapterHeader bs
- (Chapter Nothing (b:xs) :) `fmap` toChapters ys
-
- let chapters' = evalState (toChapters blocks') []
-
- let extractLinkURL' :: Int -> Inline -> [(String, String)]
- extractLinkURL' num (Span (ident, _, _) _)
- | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
- extractLinkURL' _ _ = []
-
- let extractLinkURL :: Int -> Block -> [(String, String)]
- extractLinkURL num (Div (ident, _, _) _)
- | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
- extractLinkURL num (Header _ (ident, _, _) _)
- | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
- extractLinkURL num b = query (extractLinkURL' num) b
-
- let reftable = concat $ zipWith (\(Chapter _ bs) num ->
- query (extractLinkURL num) bs)
- chapters' [1..]
-
- let fixInternalReferences :: Inline -> Inline
- fixInternalReferences (Link attr lab ('#':xs, tit)) =
- case lookup xs reftable of
- Just ys -> Link attr lab (ys, tit)
- Nothing -> Link attr lab ('#':xs, tit)
- fixInternalReferences x = x
-
- -- internal reference IDs change when we chunk the file,
- -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
- -- this fixes that:
- let chapters = map (\(Chapter mbnum bs) ->
- Chapter mbnum $ walk fixInternalReferences bs)
- chapters'
-
- let chapToEntry num (Chapter mbnum bs) =
- mkEntry (showChapter num) <$>
- (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum }
- $ case bs of
- (Header _ _ xs : _) ->
- -- remove notes or we get doubled footnotes
- Pandoc (setMeta "title" (walk removeNote $ fromList xs)
- nullMeta) bs
- _ ->
- Pandoc nullMeta bs)
-
- chapterEntries <- lift $ zipWithM chapToEntry [1..] chapters
-
- -- incredibly inefficient (TODO):
- let containsMathML ent = epub3 &&
- "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
- let containsSVG ent = epub3 &&
- "<svg" `isInfixOf` (B8.unpack $ fromEntry ent)
- let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
-
- -- contents.opf
- let chapterNode ent = unode "item" !
- ([("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
- ("media-type", "application/xhtml+xml")]
- ++ case props ent of
- [] -> []
- xs -> [("properties", unwords xs)])
- $ ()
- let chapterRefNode ent = unode "itemref" !
- [("idref", toId $ eRelativePath ent)] $ ()
- let pictureNode ent = unode "item" !
- [("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
- ("media-type", fromMaybe "application/octet-stream"
- $ mediaTypeOf $ eRelativePath ent)] $ ()
- let fontNode ent = unode "item" !
- [("id", toId $ eRelativePath ent),
- ("href", eRelativePath ent),
- ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
- let plainTitle = case docTitle' meta of
- [] -> case epubTitle metadata of
- [] -> "UNTITLED"
- (x:_) -> titleText x
- x -> stringify x
-
- let tocTitle = fromMaybe plainTitle $
- metaValueToString <$> lookupMeta "toc-title" meta
- uuid <- case epubIdentifier metadata of
- (x:_) -> return $ identifierText x -- use first identifier as UUID
- [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen
- currentTime <- lift $ P.getCurrentTime
- let contentsData = UTF8.fromStringLazy $ ppTopElement $
- unode "package" ! [("version", case version of
- EPUB2 -> "2.0"
- EPUB3 -> "3.0")
- ,("xmlns","http://www.idpf.org/2007/opf")
- ,("unique-identifier","epub-id-1")] $
- [ metadataElement version metadata currentTime
- , unode "manifest" $
- [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
- ,("media-type","application/x-dtbncx+xml")] $ ()
- , unode "item" ! [("id","style"), ("href","stylesheet.css")
- ,("media-type","text/css")] $ ()
- , unode "item" ! ([("id","nav")
- ,("href","nav.xhtml")
- ,("media-type","application/xhtml+xml")] ++
- [("properties","nav") | epub3 ]) $ ()
- ] ++
- map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
- (case cpicEntry of
- [] -> []
- (x:_) -> [add_attrs
- [Attr (unqual "properties") "cover-image" | epub3]
- (pictureNode x)]) ++
- map pictureNode picEntries ++
- map fontNode fontEntries
- , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $
- case epubCoverImage metadata of
- Nothing -> []
- Just _ -> [ unode "itemref" !
- [("idref", "cover_xhtml")] $ () ]
- ++ ((unode "itemref" ! [("idref", "title_page_xhtml")
- ,("linear",
- case lookupMeta "title" meta of
- Just _ -> "yes"
- Nothing -> "no")] $ ()) :
- [unode "itemref" ! [("idref", "nav")] $ ()
- | writerTableOfContents opts ] ++
- map chapterRefNode chapterEntries)
- , unode "guide" $
- [ unode "reference" !
- [("type","toc"),("title", tocTitle),
- ("href","nav.xhtml")] $ ()
- ] ++
- [ unode "reference" !
- [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing
- ]
- ]
- let contentsEntry = mkEntry "content.opf" contentsData
-
- -- toc.ncx
- let secs = hierarchicalize blocks'
-
- let tocLevel = writerTOCDepth opts
-
- let navPointNode :: PandocMonad m
- => (Int -> String -> String -> [Element] -> Element)
- -> S.Element -> StateT Int m Element
- navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
- n <- get
- modify (+1)
- let showNums :: [Int] -> String
- showNums = intercalate "." . map show
- let tit' = stringify ils
- let tit = if writerNumberSections opts && not (null nums)
- then showNums nums ++ " " ++ tit'
- else tit'
- src <- case lookup ident reftable of
- Just x -> return x
- Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable"
- let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
- isSec _ = False
- let subsecs = filter isSec children
- subs <- mapM (navPointNode formatter) subsecs
- return $ formatter n tit src subs
- navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk"
-
- let navMapFormatter :: Int -> String -> String -> [Element] -> Element
- navMapFormatter n tit src subs = unode "navPoint" !
- [("id", "navPoint-" ++ show n)] $
- [ unode "navLabel" $ unode "text" tit
- , unode "content" ! [("src", src)] $ ()
- ] ++ subs
-
- let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
- [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
- , unode "content" ! [("src","title_page.xhtml")] $ () ]
-
- navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1
- let tocData = UTF8.fromStringLazy $ ppTopElement $
- unode "ncx" ! [("version","2005-1")
- ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
- [ unode "head" $
- [ unode "meta" ! [("name","dtb:uid")
- ,("content", uuid)] $ ()
- , unode "meta" ! [("name","dtb:depth")
- ,("content", "1")] $ ()
- , unode "meta" ! [("name","dtb:totalPageCount")
- ,("content", "0")] $ ()
- , unode "meta" ! [("name","dtb:maxPageNumber")
- ,("content", "0")] $ ()
- ] ++ case epubCoverImage metadata of
- Nothing -> []
- Just img -> [unode "meta" ! [("name","cover"),
- ("content", toId img)] $ ()]
- , unode "docTitle" $ unode "text" $ plainTitle
- , unode "navMap" $
- tpNode : navMap
- ]
- let tocEntry = mkEntry "toc.ncx" tocData
-
- let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
- navXhtmlFormatter n tit src subs = unode "li" !
- [("id", "toc-li-" ++ show n)] $
- (unode "a" ! [("href",src)]
- $ tit)
- : case subs of
- [] -> []
- (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-
- let navtag = if epub3 then "nav" else "div"
- tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1
- let navBlocks = [RawBlock (Format "html") $ ppElement $
- unode navtag ! ([("epub:type","toc") | epub3] ++
- [("id","toc")]) $
- [ unode "h1" ! [("id","toc-title")] $ tocTitle
- , unode "ol" ! [("class","toc")] $ tocBlocks ]]
- let landmarks = if epub3
- then [RawBlock (Format "html") $ ppElement $
- unode "nav" ! [("epub:type","landmarks")
- ,("hidden","hidden")] $
- [ unode "ol" $
- [ unode "li"
- [ unode "a" ! [("href", "cover.xhtml")
- ,("epub:type", "cover")] $
- "Cover"] |
- epubCoverImage metadata /= Nothing
- ] ++
- [ unode "li"
- [ unode "a" ! [("href", "#toc")
- ,("epub:type", "toc")] $
- "Table of contents"
- ] | writerTableOfContents opts
- ]
- ]
- ]
- else []
- navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars }
- (Pandoc (setMeta "title"
- (walk removeNote $ fromList $ docTitle' meta) nullMeta)
- (navBlocks ++ landmarks))
- let navEntry = mkEntry "nav.xhtml" navData
-
- -- mimetype
- let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
-
- -- container.xml
- let containerData = UTF8.fromStringLazy $ ppTopElement $
- unode "container" ! [("version","1.0")
- ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
- unode "rootfiles" $
- unode "rootfile" ! [("full-path","content.opf")
- ,("media-type","application/oebps-package+xml")] $ ()
- let containerEntry = mkEntry "META-INF/container.xml" containerData
-
- -- com.apple.ibooks.display-options.xml
- let apple = UTF8.fromStringLazy $ ppTopElement $
- unode "display_options" $
- unode "platform" ! [("name","*")] $
- unode "option" ! [("name","specified-fonts")] $ "true"
- let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
-
- -- stylesheet
- stylesheet <- case epubStylesheet metadata of
- Just (StylesheetPath fp) -> UTF8.toStringLazy <$> (lift $ P.readFileLazy fp)
- Just (StylesheetContents s) -> return s
- Nothing -> UTF8.toString `fmap`
- (lift $ P.readDataFile (writerUserDataDir opts) "epub.css")
- let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
-
- -- construct archive
- let archive = foldr addEntryToArchive emptyArchive
- (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
- contentsEntry : tocEntry : navEntry :
- (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
- return $ fromArchive archive
-
-metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
-metadataElement version md currentTime =
- unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
- ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
- where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
- ++ creatorNodes ++ contributorNodes ++ subjectNodes
- ++ descriptionNodes ++ typeNodes ++ formatNodes
- ++ publisherNodes ++ sourceNodes ++ relationNodes
- ++ coverageNodes ++ rightsNodes ++ coverImageNodes
- ++ modifiedNodes
- withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
- ([1..] :: [Int]))
- identifierNodes = withIds "epub-id" toIdentifierNode $
- epubIdentifier md
- titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
- dateNodes = if version == EPUB2
- then withIds "epub-date" toDateNode $ epubDate md
- else -- epub3 allows only one dc:date
- -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate
- case epubDate md of
- [] -> []
- (x:_) -> [dcNode "date" ! [("id","epub-date")]
- $ dateText x]
- languageNodes = [dcTag "language" $ epubLanguage md]
- creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
- epubCreator md
- contributorNodes = withIds "epub-contributor"
- (toCreatorNode "contributor") $ epubContributor md
- subjectNodes = map (dcTag "subject") $ epubSubject md
- descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
- typeNodes = maybe [] (dcTag' "type") $ epubType md
- formatNodes = maybe [] (dcTag' "format") $ epubFormat md
- publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md
- sourceNodes = maybe [] (dcTag' "source") $ epubSource md
- relationNodes = maybe [] (dcTag' "relation") $ epubRelation md
- coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
- rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
- coverImageNodes = maybe []
- (\img -> [unode "meta" ! [("name","cover"),
- ("content",toId img)] $ ()])
- $ epubCoverImage md
- modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
- (showDateTimeISO8601 currentTime) | version == EPUB3 ]
- dcTag n s = unode ("dc:" ++ n) s
- dcTag' n s = [dcTag n s]
- toIdentifierNode id' (Identifier txt scheme)
- | version == EPUB2 = [dcNode "identifier" !
- ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $
- txt]
- | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++
- maybe [] (\x -> [unode "meta" !
- [("refines",'#':id'),("property","identifier-type"),
- ("scheme","onix:codelist5")] $ x])
- (schemeToOnix `fmap` scheme)
- toCreatorNode s id' creator
- | version == EPUB2 = [dcNode s !
- (("id",id') :
- maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
- maybe [] (\x -> [("opf:role",x)])
- (creatorRole creator >>= toRelator)) $ creatorText creator]
- | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
- maybe [] (\x -> [unode "meta" !
- [("refines",'#':id'),("property","file-as")] $ x])
- (creatorFileAs creator) ++
- maybe [] (\x -> [unode "meta" !
- [("refines",'#':id'),("property","role"),
- ("scheme","marc:relators")] $ x])
- (creatorRole creator >>= toRelator)
- toTitleNode id' title
- | version == EPUB2 = [dcNode "title" !
- (("id",id') :
- -- note: EPUB2 doesn't accept opf:title-type
- maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $
- titleText title]
- | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
- ++
- maybe [] (\x -> [unode "meta" !
- [("refines",'#':id'),("property","file-as")] $ x])
- (titleFileAs title) ++
- maybe [] (\x -> [unode "meta" !
- [("refines",'#':id'),("property","title-type")] $ x])
- (titleType title)
- toDateNode id' date = [dcNode "date" !
- (("id",id') :
- maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
- dateText date]
- schemeToOnix "ISBN-10" = "02"
- schemeToOnix "GTIN-13" = "03"
- schemeToOnix "UPC" = "04"
- schemeToOnix "ISMN-10" = "05"
- schemeToOnix "DOI" = "06"
- schemeToOnix "LCCN" = "13"
- schemeToOnix "GTIN-14" = "14"
- schemeToOnix "ISBN-13" = "15"
- schemeToOnix "Legal deposit number" = "17"
- schemeToOnix "URN" = "22"
- schemeToOnix "OCLC" = "23"
- schemeToOnix "ISMN-13" = "25"
- schemeToOnix "ISBN-A" = "26"
- schemeToOnix "JP" = "27"
- schemeToOnix "OLCC" = "28"
- schemeToOnix _ = "01"
-
-showDateTimeISO8601 :: UTCTime -> String
-showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
-
-transformTag :: PandocMonad m
- => WriterOptions
- -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
- -> Tag String
- -> E m (Tag String)
-transformTag opts tag@(TagOpen name attr)
- | name `elem` ["video", "source", "img", "audio"] &&
- lookup "data-external" attr == Nothing = do
- let src = fromAttrib "src" tag
- let poster = fromAttrib "poster" tag
- newsrc <- modifyMediaRef opts src
- newposter <- modifyMediaRef opts poster
- let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
- [("src", newsrc) | not (null newsrc)] ++
- [("poster", newposter) | not (null newposter)]
- return $ TagOpen name attr'
-transformTag _ tag = return tag
-
-modifyMediaRef :: PandocMonad m
- => WriterOptions
- -> FilePath
- -> E m FilePath
-modifyMediaRef _ "" = return ""
-modifyMediaRef opts oldsrc = do
- media <- gets stMediaPaths
- case lookup oldsrc media of
- Just (n,_) -> return n
- Nothing -> catchError
- (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc
- let new = "media/file" ++ show (length media) ++
- fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
- (('.':) <$> (mbMime >>= extensionFromMimeType))
- epochtime <- floor `fmap` lift P.getPOSIXTime
- let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
- modify $ \st -> st{ stMediaPaths =
- (oldsrc, (new, Just entry)):media}
- return new)
- (\e -> do
- report $ CouldNotFetchResource oldsrc (show e)
- return oldsrc)
-
-transformBlock :: PandocMonad m
- => WriterOptions
- -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
- -> Block
- -> E m Block
-transformBlock opts (RawBlock fmt raw)
- | fmt == Format "html" = do
- let tags = parseTags raw
- tags' <- mapM (transformTag opts) tags
- return $ RawBlock fmt (renderTags' tags')
-transformBlock _ b = return b
-
-transformInline :: PandocMonad m
- => WriterOptions
- -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
- -> Inline
- -> E m Inline
-transformInline opts (Image attr lab (src,tit)) = do
- newsrc <- modifyMediaRef opts src
- return $ Image attr lab (newsrc, tit)
-transformInline opts (x@(Math t m))
- | WebTeX url <- writerHTMLMathMethod opts = do
- newsrc <- modifyMediaRef opts (url ++ urlEncode m)
- let mathclass = if t == DisplayMath then "display" else "inline"
- return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")]
-transformInline opts (RawInline fmt raw)
- | fmt == Format "html" = do
- let tags = parseTags raw
- tags' <- mapM (transformTag opts) tags
- return $ RawInline fmt (renderTags' tags')
-transformInline _ x = return x
-
-(!) :: (t -> Element) -> [(String, String)] -> t -> Element
-(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
-
--- | Version of 'ppTopElement' that specifies UTF-8 encoding.
-ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
- -- unEntity removes numeric entities introduced by ppElement
- -- (kindlegen seems to choke on these).
- where unEntity [] = ""
- unEntity ('&':'#':xs) =
- let (ds,ys) = break (==';') xs
- rest = drop 1 ys
- in case safeRead ('\'':'\\':ds ++ "'") of
- Just x -> x : unEntity rest
- Nothing -> '&':'#':unEntity xs
- unEntity (x:xs) = x : unEntity xs
-
-mediaTypeOf :: FilePath -> Maybe MimeType
-mediaTypeOf x =
- let mediaPrefixes = ["image", "video", "audio"] in
- case getMimeType x of
- Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
- _ -> Nothing
-
--- Returns filename for chapter number.
-showChapter :: Int -> String
-showChapter = printf "ch%03d.xhtml"
-
--- Add identifiers to any headers without them.
-addIdentifiers :: [Block] -> [Block]
-addIdentifiers bs = evalState (mapM go bs) Set.empty
- where go (Header n (ident,classes,kvs) ils) = do
- ids <- get
- let ident' = if null ident
- then uniqueIdent ils ids
- else ident
- modify $ Set.insert ident'
- return $ Header n (ident',classes,kvs) ils
- go x = return x
-
--- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
-normalizeDate' :: String -> Maybe String
-normalizeDate' xs =
- let xs' = trim xs in
- case xs' of
- [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY
- [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM
- -> Just xs'
- _ -> normalizeDate xs'
-
-toRelator :: String -> Maybe String
-toRelator x
- | x `elem` relators = Just x
- | otherwise = lookup (map toLower x) relatorMap
-
-relators :: [String]
-relators = map snd relatorMap
-
-relatorMap :: [(String, String)]
-relatorMap =
- [("abridger", "abr")
- ,("actor", "act")
- ,("adapter", "adp")
- ,("addressee", "rcp")
- ,("analyst", "anl")
- ,("animator", "anm")
- ,("annotator", "ann")
- ,("appellant", "apl")
- ,("appellee", "ape")
- ,("applicant", "app")
- ,("architect", "arc")
- ,("arranger", "arr")
- ,("art copyist", "acp")
- ,("art director", "adi")
- ,("artist", "art")
- ,("artistic director", "ard")
- ,("assignee", "asg")
- ,("associated name", "asn")
- ,("attributed name", "att")
- ,("auctioneer", "auc")
- ,("author", "aut")
- ,("author in quotations or text abstracts", "aqt")
- ,("author of afterword, colophon, etc.", "aft")
- ,("author of dialog", "aud")
- ,("author of introduction, etc.", "aui")
- ,("autographer", "ato")
- ,("bibliographic antecedent", "ant")
- ,("binder", "bnd")
- ,("binding designer", "bdd")
- ,("blurb writer", "blw")
- ,("book designer", "bkd")
- ,("book producer", "bkp")
- ,("bookjacket designer", "bjd")
- ,("bookplate designer", "bpd")
- ,("bookseller", "bsl")
- ,("braille embosser", "brl")
- ,("broadcaster", "brd")
- ,("calligrapher", "cll")
- ,("cartographer", "ctg")
- ,("caster", "cas")
- ,("censor", "cns")
- ,("choreographer", "chr")
- ,("cinematographer", "cng")
- ,("client", "cli")
- ,("collection registrar", "cor")
- ,("collector", "col")
- ,("collotyper", "clt")
- ,("colorist", "clr")
- ,("commentator", "cmm")
- ,("commentator for written text", "cwt")
- ,("compiler", "com")
- ,("complainant", "cpl")
- ,("complainant-appellant", "cpt")
- ,("complainant-appellee", "cpe")
- ,("composer", "cmp")
- ,("compositor", "cmt")
- ,("conceptor", "ccp")
- ,("conductor", "cnd")
- ,("conservator", "con")
- ,("consultant", "csl")
- ,("consultant to a project", "csp")
- ,("contestant", "cos")
- ,("contestant-appellant", "cot")
- ,("contestant-appellee", "coe")
- ,("contestee", "cts")
- ,("contestee-appellant", "ctt")
- ,("contestee-appellee", "cte")
- ,("contractor", "ctr")
- ,("contributor", "ctb")
- ,("copyright claimant", "cpc")
- ,("copyright holder", "cph")
- ,("corrector", "crr")
- ,("correspondent", "crp")
- ,("costume designer", "cst")
- ,("court governed", "cou")
- ,("court reporter", "crt")
- ,("cover designer", "cov")
- ,("creator", "cre")
- ,("curator", "cur")
- ,("dancer", "dnc")
- ,("data contributor", "dtc")
- ,("data manager", "dtm")
- ,("dedicatee", "dte")
- ,("dedicator", "dto")
- ,("defendant", "dfd")
- ,("defendant-appellant", "dft")
- ,("defendant-appellee", "dfe")
- ,("degree granting institution", "dgg")
- ,("delineator", "dln")
- ,("depicted", "dpc")
- ,("depositor", "dpt")
- ,("designer", "dsr")
- ,("director", "drt")
- ,("dissertant", "dis")
- ,("distribution place", "dbp")
- ,("distributor", "dst")
- ,("donor", "dnr")
- ,("draftsman", "drm")
- ,("dubious author", "dub")
- ,("editor", "edt")
- ,("editor of compilation", "edc")
- ,("editor of moving image work", "edm")
- ,("electrician", "elg")
- ,("electrotyper", "elt")
- ,("enacting jurisdiction", "enj")
- ,("engineer", "eng")
- ,("engraver", "egr")
- ,("etcher", "etr")
- ,("event place", "evp")
- ,("expert", "exp")
- ,("facsimilist", "fac")
- ,("field director", "fld")
- ,("film director", "fmd")
- ,("film distributor", "fds")
- ,("film editor", "flm")
- ,("film producer", "fmp")
- ,("filmmaker", "fmk")
- ,("first party", "fpy")
- ,("forger", "frg")
- ,("former owner", "fmo")
- ,("funder", "fnd")
- ,("geographic information specialist", "gis")
- ,("honoree", "hnr")
- ,("host", "hst")
- ,("host institution", "his")
- ,("illuminator", "ilu")
- ,("illustrator", "ill")
- ,("inscriber", "ins")
- ,("instrumentalist", "itr")
- ,("interviewee", "ive")
- ,("interviewer", "ivr")
- ,("inventor", "inv")
- ,("issuing body", "isb")
- ,("judge", "jud")
- ,("jurisdiction governed", "jug")
- ,("laboratory", "lbr")
- ,("laboratory director", "ldr")
- ,("landscape architect", "lsa")
- ,("lead", "led")
- ,("lender", "len")
- ,("libelant", "lil")
- ,("libelant-appellant", "lit")
- ,("libelant-appellee", "lie")
- ,("libelee", "lel")
- ,("libelee-appellant", "let")
- ,("libelee-appellee", "lee")
- ,("librettist", "lbt")
- ,("licensee", "lse")
- ,("licensor", "lso")
- ,("lighting designer", "lgd")
- ,("lithographer", "ltg")
- ,("lyricist", "lyr")
- ,("manufacture place", "mfp")
- ,("manufacturer", "mfr")
- ,("marbler", "mrb")
- ,("markup editor", "mrk")
- ,("metadata contact", "mdc")
- ,("metal-engraver", "mte")
- ,("moderator", "mod")
- ,("monitor", "mon")
- ,("music copyist", "mcp")
- ,("musical director", "msd")
- ,("musician", "mus")
- ,("narrator", "nrt")
- ,("onscreen presenter", "osp")
- ,("opponent", "opn")
- ,("organizer of meeting", "orm")
- ,("originator", "org")
- ,("other", "oth")
- ,("owner", "own")
- ,("panelist", "pan")
- ,("papermaker", "ppm")
- ,("patent applicant", "pta")
- ,("patent holder", "pth")
- ,("patron", "pat")
- ,("performer", "prf")
- ,("permitting agency", "pma")
- ,("photographer", "pht")
- ,("plaintiff", "ptf")
- ,("plaintiff-appellant", "ptt")
- ,("plaintiff-appellee", "pte")
- ,("platemaker", "plt")
- ,("praeses", "pra")
- ,("presenter", "pre")
- ,("printer", "prt")
- ,("printer of plates", "pop")
- ,("printmaker", "prm")
- ,("process contact", "prc")
- ,("producer", "pro")
- ,("production company", "prn")
- ,("production designer", "prs")
- ,("production manager", "pmn")
- ,("production personnel", "prd")
- ,("production place", "prp")
- ,("programmer", "prg")
- ,("project director", "pdr")
- ,("proofreader", "pfr")
- ,("provider", "prv")
- ,("publication place", "pup")
- ,("publisher", "pbl")
- ,("publishing director", "pbd")
- ,("puppeteer", "ppt")
- ,("radio director", "rdd")
- ,("radio producer", "rpc")
- ,("recording engineer", "rce")
- ,("recordist", "rcd")
- ,("redaktor", "red")
- ,("renderer", "ren")
- ,("reporter", "rpt")
- ,("repository", "rps")
- ,("research team head", "rth")
- ,("research team member", "rtm")
- ,("researcher", "res")
- ,("respondent", "rsp")
- ,("respondent-appellant", "rst")
- ,("respondent-appellee", "rse")
- ,("responsible party", "rpy")
- ,("restager", "rsg")
- ,("restorationist", "rsr")
- ,("reviewer", "rev")
- ,("rubricator", "rbr")
- ,("scenarist", "sce")
- ,("scientific advisor", "sad")
- ,("screenwriter", "aus")
- ,("scribe", "scr")
- ,("sculptor", "scl")
- ,("second party", "spy")
- ,("secretary", "sec")
- ,("seller", "sll")
- ,("set designer", "std")
- ,("setting", "stg")
- ,("signer", "sgn")
- ,("singer", "sng")
- ,("sound designer", "sds")
- ,("speaker", "spk")
- ,("sponsor", "spn")
- ,("stage director", "sgd")
- ,("stage manager", "stm")
- ,("standards body", "stn")
- ,("stereotyper", "str")
- ,("storyteller", "stl")
- ,("supporting host", "sht")
- ,("surveyor", "srv")
- ,("teacher", "tch")
- ,("technical director", "tcd")
- ,("television director", "tld")
- ,("television producer", "tlp")
- ,("thesis advisor", "ths")
- ,("transcriber", "trc")
- ,("translator", "trl")
- ,("type designer", "tyd")
- ,("typographer", "tyg")
- ,("university place", "uvp")
- ,("videographer", "vdg")
- ,("witness", "wit")
- ,("wood engraver", "wde")
- ,("woodcutter", "wdc")
- ,("writer of accompanying material", "wam")
- ,("writer of added commentary", "wac")
- ,("writer of added lyrics", "wal")
- ,("writer of added text", "wat")
- ]
-
-docTitle' :: Meta -> [Inline]
-docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta
- where go (MetaString s) = [Str s]
- go (MetaInlines xs) = xs
- go (MetaBlocks [Para xs]) = xs
- go (MetaBlocks [Plain xs]) = xs
- go (MetaMap m) =
- case M.lookup "type" m of
- Just x | stringify x == "main" ->
- maybe [] go $ M.lookup "text" m
- _ -> []
- go (MetaList xs) = concatMap go xs
- go _ = []
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
deleted file mode 100644
index 967fe6a4c..000000000
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ /dev/null
@@ -1,617 +0,0 @@
-{-# LANGUAGE PatternGuards #-}
-
-{-
-Copyright (c) 2011-2012, Sergey Astanin
-All rights reserved.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format.
-
-FictionBook is an XML-based e-book format. For more information see:
-<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1>
-
--}
-module Text.Pandoc.Writers.FB2 (writeFB2) where
-
-import Control.Monad.State (StateT, evalStateT, get, modify, lift)
-import Control.Monad.State (liftM)
-import Data.ByteString.Base64 (encode)
-import Data.Char (toLower, isSpace, isAscii, isControl)
-import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix)
-import Data.Either (lefts, rights)
-import Network.HTTP (urlEncode)
-import Network.URI (isURI)
-import Text.XML.Light
-import qualified Text.XML.Light as X
-import qualified Text.XML.Light.Cursor as XC
-import qualified Data.ByteString.Char8 as B8
-import Control.Monad.Except (throwError, catchError)
-
-import Text.Pandoc.Logging
-import Text.Pandoc.Definition
-import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize,
- linesToPara)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import qualified Text.Pandoc.Class as P
-
--- | Data to be written at the end of the document:
--- (foot)notes, URLs, references, images.
-data FbRenderState = FbRenderState
- { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text
- , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path
- , parentListMarker :: String -- ^ list marker of the parent ordered list
- , parentBulletLevel :: Int -- ^ nesting level of the unordered list
- , writerOptions :: WriterOptions
- } deriving (Show)
-
--- | FictionBook building monad.
-type FBM m = StateT FbRenderState m
-
-newFB :: FbRenderState
-newFB = FbRenderState { footnotes = [], imagesToFetch = []
- , parentListMarker = "", parentBulletLevel = 0
- , writerOptions = def }
-
-data ImageMode = NormalImage | InlineImage deriving (Eq)
-instance Show ImageMode where
- show NormalImage = "imageType"
- show InlineImage = "inlineImageType"
-
--- | Produce an FB2 document from a 'Pandoc' document.
-writeFB2 :: PandocMonad m
- => WriterOptions -- ^ conversion options
- -> Pandoc -- ^ document to convert
- -> m String -- ^ FictionBook2 document (not encoded yet)
-writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc
-
-pandocToFB2 :: PandocMonad m
- => WriterOptions
- -> Pandoc
- -> FBM m String
-pandocToFB2 opts (Pandoc meta blocks) = do
- modify (\s -> s { writerOptions = opts })
- desc <- description meta
- fp <- frontpage meta
- secs <- renderSections 1 blocks
- let body = el "body" $ fp ++ secs
- notes <- renderFootnotes
- (imgs,missing) <- liftM imagesToFetch get >>= \s -> lift (fetchImages s)
- let body' = replaceImagesWithAlt missing body
- let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
- return $ xml_head ++ (showContent fb2_xml) ++ "\n"
- where
- xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
- fb2_attrs =
- let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0"
- xlink = "http://www.w3.org/1999/xlink"
- in [ uattr "xmlns" xmlns
- , attr ("xmlns", "l") xlink ]
-
-frontpage :: PandocMonad m => Meta -> FBM m [Content]
-frontpage meta' = do
- t <- cMapM toXml . docTitle $ meta'
- return $
- [ el "title" (el "p" t)
- , el "annotation" (map (el "p" . cMap plain)
- (docAuthors meta' ++ [docDate meta']))
- ]
-
-description :: PandocMonad m => Meta -> FBM m Content
-description meta' = do
- bt <- booktitle meta'
- let as = authors meta'
- dd <- docdate meta'
- return $ el "description"
- [ el "title-info" (bt ++ as ++ dd)
- , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version
- ]
-
-booktitle :: PandocMonad m => Meta -> FBM m [Content]
-booktitle meta' = do
- t <- cMapM toXml . docTitle $ meta'
- return $ if null t
- then []
- else [ el "book-title" t ]
-
-authors :: Meta -> [Content]
-authors meta' = cMap author (docAuthors meta')
-
-author :: [Inline] -> [Content]
-author ss =
- let ws = words . cMap plain $ ss
- email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws)
- ws' = filter ('@' `notElem`) ws
- names = case ws' of
- (nickname:[]) -> [ el "nickname" nickname ]
- (fname:lname:[]) -> [ el "first-name" fname
- , el "last-name" lname ]
- (fname:rest) -> [ el "first-name" fname
- , el "middle-name" (concat . init $ rest)
- , el "last-name" (last rest) ]
- ([]) -> []
- in list $ el "author" (names ++ email)
-
-docdate :: PandocMonad m => Meta -> FBM m [Content]
-docdate meta' = do
- let ss = docDate meta'
- d <- cMapM toXml ss
- return $ if null d
- then []
- else [el "date" d]
-
--- | Divide the stream of blocks into sections and convert to XML
--- representation.
-renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content]
-renderSections level blocks = do
- let secs = splitSections level blocks
- mapM (renderSection level) secs
-
-renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content
-renderSection level (ttl, body) = do
- title <- if null ttl
- then return []
- else return . list . el "title" . formatTitle $ ttl
- content <- if (hasSubsections body)
- then renderSections (level + 1) body
- else cMapM blockToXml body
- return $ el "section" (title ++ content)
- where
- hasSubsections = any isHeaderBlock
-
--- | Only <p> and <empty-line> are allowed within <title> in FB2.
-formatTitle :: [Inline] -> [Content]
-formatTitle inlines =
- let lns = split isLineBreak inlines
- lns' = map (el "p" . cMap plain) lns
- in intersperse (el "empty-line" ()) lns'
-
-split :: (a -> Bool) -> [a] -> [[a]]
-split _ [] = []
-split cond xs = let (b,a) = break cond xs
- in (b:split cond (drop 1 a))
-
-isLineBreak :: Inline -> Bool
-isLineBreak LineBreak = True
-isLineBreak _ = False
-
--- | Divide the stream of block elements into sections: [(title, blocks)].
-splitSections :: Int -> [Block] -> [([Inline], [Block])]
-splitSections level blocks = reverse $ revSplit (reverse blocks)
- where
- revSplit [] = []
- revSplit rblocks =
- let (lastsec, before) = break sameLevel rblocks
- (header, prevblocks) =
- case before of
- ((Header n _ title):prevblocks') ->
- if n == level
- then (title, prevblocks')
- else ([], before)
- _ -> ([], before)
- in (header, reverse lastsec) : revSplit prevblocks
- sameLevel (Header n _ _) = n == level
- sameLevel _ = False
-
--- | Make another FictionBook body with footnotes.
-renderFootnotes :: PandocMonad m => FBM m [Content]
-renderFootnotes = do
- fns <- footnotes `liftM` get
- if null fns
- then return [] -- no footnotes
- else return . list $
- el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
- where
- renderFN (n, idstr, cs) =
- let fn_texts = (el "title" (el "p" (show n))) : cs
- in el "section" ([uattr "id" idstr], fn_texts)
-
--- | Fetch images and encode them for the FictionBook XML.
--- Return image data and a list of hrefs of the missing images.
-fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String])
-fetchImages links = do
- imgs <- mapM (uncurry fetchImage) links
- return $ (rights imgs, lefts imgs)
-
--- | Fetch image data from disk or from network and make a <binary> XML section.
--- Return either (Left hrefOfMissingImage) or (Right xmlContent).
-fetchImage :: PandocMonad m => String -> String -> m (Either String Content)
-fetchImage href link = do
- mbimg <-
- case (isURI link, readDataURI link) of
- (True, Just (mime,_,True,base64)) ->
- let mime' = map toLower mime
- in if mime' == "image/png" || mime' == "image/jpeg"
- then return (Just (mime',base64))
- else return Nothing
- (True, Just _) -> return Nothing -- not base64-encoded
- _ -> do
- catchError (do (bs, mbmime) <- P.fetchItem Nothing link
- case mbmime of
- Nothing -> do
- report $ CouldNotDetermineMimeType link
- return Nothing
- Just mime -> return $ Just (mime,
- B8.unpack $ encode bs))
- (\e ->
- do report $ CouldNotFetchResource link (show e)
- return Nothing)
- case mbimg of
- Just (imgtype, imgdata) -> do
- return . Right $ el "binary"
- ( [uattr "id" href
- , uattr "content-type" imgtype]
- , txt imgdata )
- _ -> return (Left ('#':href))
-
-
--- | Extract mime type and encoded data from the Data URI.
-readDataURI :: String -- ^ URI
- -> Maybe (String,String,Bool,String)
- -- ^ Maybe (mime,charset,isBase64,data)
-readDataURI uri =
- case stripPrefix "data:" uri of
- Nothing -> Nothing
- Just rest ->
- let meta = takeWhile (/= ',') rest -- without trailing ','
- uridata = drop (length meta + 1) rest
- parts = split (== ';') meta
- (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
- in Just (mime,cs,enc,uridata)
-
- where
- upd str m@(mime,cs,enc)
- | isMimeType str = (str,cs,enc)
- | Just str' <- stripPrefix "charset=" str = (mime,str',enc)
- | str == "base64" = (mime,cs,True)
- | otherwise = m
-
--- Without parameters like ;charset=...; see RFC 2045, 5.1
-isMimeType :: String -> Bool
-isMimeType s =
- case split (=='/') s of
- [mtype,msubtype] ->
- ((map toLower mtype) `elem` types
- || "x-" `isPrefixOf` (map toLower mtype))
- && all valid mtype
- && all valid msubtype
- _ -> False
- where
- types = ["text","image","audio","video","application","message","multipart"]
- valid c = isAscii c && not (isControl c) && not (isSpace c) &&
- c `notElem` "()<>@,;:\\\"/[]?="
-
-footnoteID :: Int -> String
-footnoteID i = "n" ++ (show i)
-
-linkID :: Int -> String
-linkID i = "l" ++ (show i)
-
--- | Convert a block-level Pandoc's element to FictionBook XML representation.
-blockToXml :: PandocMonad m => Block -> FBM m [Content]
-blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2
-blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
--- title beginning with fig: indicates that the image is a figure
-blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) =
- insertImage NormalImage (Image atr alt (src,tit))
-blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss
-blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
- map (el "p" . el "code") . lines $ s
-blockToXml b@(RawBlock _ _) = do
- report $ BlockNotRendered b
- return []
-blockToXml (Div _ bs) = cMapM blockToXml bs
-blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
-blockToXml (LineBlock lns) = blockToXml $ linesToPara lns
-blockToXml (OrderedList a bss) = do
- state <- get
- let pmrk = parentListMarker state
- let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a
- let mkitem mrk bs = do
- modify (\s -> s { parentListMarker = mrk })
- itemtext <- cMapM blockToXml . paraToPlain $ bs
- modify (\s -> s { parentListMarker = pmrk }) -- old parent marker
- return . el "p" $ [ txt mrk, txt " " ] ++ itemtext
- mapM (uncurry mkitem) (zip markers bss)
-blockToXml (BulletList bss) = do
- state <- get
- let level = parentBulletLevel state
- let pmrk = parentListMarker state
- let prefix = replicate (length pmrk) ' '
- let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"]
- let mrk = prefix ++ bullets !! (level `mod` (length bullets))
- let mkitem bs = do
- modify (\s -> s { parentBulletLevel = (level+1) })
- itemtext <- cMapM blockToXml . paraToPlain $ bs
- modify (\s -> s { parentBulletLevel = level }) -- restore bullet level
- return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext
- mapM mkitem bss
-blockToXml (DefinitionList defs) =
- cMapM mkdef defs
- where
- mkdef (term, bss) = do
- def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss
- t <- wrap "strong" term
- return [ el "p" t, el "p" def' ]
- sep blocks =
- if all needsBreak blocks then
- blocks ++ [Plain [LineBreak]]
- else
- blocks
- needsBreak (Para _) = False
- needsBreak (Plain ins) = LineBreak `notElem` ins
- needsBreak _ = True
-blockToXml (Header _ _ _) = -- should never happen, see renderSections
- throwError $ PandocShouldNeverHappenError "unexpected header in section text"
-blockToXml HorizontalRule = return
- [ el "empty-line" ()
- , el "p" (txt (replicate 10 '—'))
- , el "empty-line" () ]
-blockToXml (Table caption aligns _ headers rows) = do
- hd <- mkrow "th" headers aligns
- bd <- mapM (\r -> mkrow "td" r aligns) rows
- c <- return . el "emphasis" =<< cMapM toXml caption
- return [el "table" (hd : bd), el "p" c]
- where
- mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content
- mkrow tag cells aligns' =
- (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns'))
- --
- mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content
- mkcell tag (cell, align) = do
- cblocks <- cMapM blockToXml cell
- return $ el tag ([align_attr align], cblocks)
- --
- align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
- align_str AlignLeft = "left"
- align_str AlignCenter = "center"
- align_str AlignRight = "right"
- align_str AlignDefault = "left"
-blockToXml Null = return []
-
--- Replace paragraphs with plain text and line break.
--- Necessary to simulate multi-paragraph lists in FB2.
-paraToPlain :: [Block] -> [Block]
-paraToPlain [] = []
-paraToPlain (Para inlines : rest) =
- let p = (Plain (inlines ++ [LineBreak]))
- in p : paraToPlain rest
-paraToPlain (p:rest) = p : paraToPlain rest
-
--- Simulate increased indentation level. Will not really work
--- for multi-line paragraphs.
-indent :: Block -> Block
-indent = indentBlock
- where
- -- indentation space
- spacer :: String
- spacer = replicate 4 ' '
- --
- indentBlock (Plain ins) = Plain ((Str spacer):ins)
- indentBlock (Para ins) = Para ((Str spacer):ins)
- indentBlock (CodeBlock a s) =
- let s' = unlines . map (spacer++) . lines $ s
- in CodeBlock a s'
- indentBlock (BlockQuote bs) = BlockQuote (map indent bs)
- indentBlock (Header l attr' ins) = Header l attr' (indentLines ins)
- indentBlock everythingElse = everythingElse
- -- indent every (explicit) line
- indentLines :: [Inline] -> [Inline]
- indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
- in intercalate [LineBreak] $ map ((Str spacer):) lns
-
--- | Convert a Pandoc's Inline element to FictionBook XML representation.
-toXml :: PandocMonad m => Inline -> FBM m [Content]
-toXml (Str s) = return [txt s]
-toXml (Span _ ils) = cMapM toXml ils
-toXml (Emph ss) = list `liftM` wrap "emphasis" ss
-toXml (Strong ss) = list `liftM` wrap "strong" ss
-toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
-toXml (Superscript ss) = list `liftM` wrap "sup" ss
-toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
-toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
- inner <- cMapM toXml ss
- return $ [txt "‘"] ++ inner ++ [txt "’"]
-toXml (Quoted DoubleQuote ss) = do
- inner <- cMapM toXml ss
- return $ [txt "“"] ++ inner ++ [txt "”"]
-toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles
-toXml (Code _ s) = return [el "code" s]
-toXml Space = return [txt " "]
-toXml SoftBreak = return [txt " "]
-toXml LineBreak = return [el "empty-line" ()]
-toXml (Math _ formula) = insertMath InlineImage formula
-toXml il@(RawInline _ _) = do
- report $ InlineNotRendered il
- return [] -- raw TeX and raw HTML are suppressed
-toXml (Link _ text (url,ttl)) = do
- fns <- footnotes `liftM` get
- let n = 1 + length fns
- let ln_id = linkID n
- let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]"
- ln_text <- cMapM toXml text
- let ln_desc =
- let ttl' = dropWhile isSpace ttl
- in if null ttl'
- then list . el "p" $ el "code" url
- else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ]
- modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns })
- return $ ln_text ++
- [ el "a"
- ( [ attr ("l","href") ('#':ln_id)
- , uattr "type" "note" ]
- , ln_ref) ]
-toXml img@(Image _ _ _) = insertImage InlineImage img
-toXml (Note bs) = do
- fns <- footnotes `liftM` get
- let n = 1 + length fns
- let fn_id = footnoteID n
- fn_desc <- cMapM blockToXml bs
- modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns })
- let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]"
- return . list $ el "a" ( [ attr ("l","href") ('#':fn_id)
- , uattr "type" "note" ]
- , fn_ref )
-
-insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content]
-insertMath immode formula = do
- htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get
- case htmlMath of
- WebTeX url -> do
- let alt = [Code nullAttr formula]
- let imgurl = url ++ urlEncode formula
- let img = Image nullAttr alt (imgurl, "")
- insertImage immode img
- _ -> return [el "code" formula]
-
-insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
-insertImage immode (Image _ alt (url,ttl)) = do
- images <- imagesToFetch `liftM` get
- let n = 1 + length images
- let fname = "image" ++ show n
- modify (\s -> s { imagesToFetch = (fname, url) : images })
- let ttlattr = case (immode, null ttl) of
- (NormalImage, False) -> [ uattr "title" ttl ]
- _ -> []
- return . list $
- el "image" $
- [ attr ("l","href") ('#':fname)
- , attr ("l","type") (show immode)
- , uattr "alt" (cMap plain alt) ]
- ++ ttlattr
-insertImage _ _ = error "unexpected inline instead of image"
-
-replaceImagesWithAlt :: [String] -> Content -> Content
-replaceImagesWithAlt missingHrefs body =
- let cur = XC.fromContent body
- cur' = replaceAll cur
- in XC.toTree . XC.root $ cur'
- where
- --
- replaceAll :: XC.Cursor -> XC.Cursor
- replaceAll c =
- let n = XC.current c
- c' = if isImage n && isMissing n
- then XC.modifyContent replaceNode c
- else c
- in case XC.nextDF c' of
- (Just cnext) -> replaceAll cnext
- Nothing -> c' -- end of document
- --
- isImage :: Content -> Bool
- isImage (Elem e) = (elName e) == (uname "image")
- isImage _ = False
- --
- isMissing (Elem img@(Element _ _ _ _)) =
- let imgAttrs = elAttribs img
- badAttrs = map (attr ("l","href")) missingHrefs
- in any (`elem` imgAttrs) badAttrs
- isMissing _ = False
- --
- replaceNode :: Content -> Content
- replaceNode n@(Elem img@(Element _ _ _ _)) =
- let attrs = elAttribs img
- alt = getAttrVal attrs (uname "alt")
- imtype = getAttrVal attrs (qname "l" "type")
- in case (alt, imtype) of
- (Just alt', Just imtype') ->
- if imtype' == show NormalImage
- then el "p" alt'
- else txt alt'
- (Just alt', Nothing) -> txt alt' -- no type attribute
- _ -> n -- don't replace if alt text is not found
- replaceNode n = n
- --
- getAttrVal :: [X.Attr] -> QName -> Maybe String
- getAttrVal attrs name =
- case filter ((name ==) . attrKey) attrs of
- (a:_) -> Just (attrVal a)
- _ -> Nothing
-
-
--- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
-wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
-
--- " Create a singleton list.
-list :: a -> [a]
-list = (:[])
-
--- | Convert an 'Inline' to plaintext.
-plain :: Inline -> String
-plain (Str s) = s
-plain (Emph ss) = concat (map plain ss)
-plain (Span _ ss) = concat (map plain ss)
-plain (Strong ss) = concat (map plain ss)
-plain (Strikeout ss) = concat (map plain ss)
-plain (Superscript ss) = concat (map plain ss)
-plain (Subscript ss) = concat (map plain ss)
-plain (SmallCaps ss) = concat (map plain ss)
-plain (Quoted _ ss) = concat (map plain ss)
-plain (Cite _ ss) = concat (map plain ss) -- FIXME
-plain (Code _ s) = s
-plain Space = " "
-plain SoftBreak = " "
-plain LineBreak = "\n"
-plain (Math _ s) = s
-plain (RawInline _ _) = ""
-plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"])
-plain (Image _ alt _) = concat (map plain alt)
-plain (Note _) = "" -- FIXME
-
--- | Create an XML element.
-el :: (Node t)
- => String -- ^ unqualified element name
- -> t -- ^ node contents
- -> Content -- ^ XML content
-el name cs = Elem $ unode name cs
-
--- | Put empty lines around content
-spaceBeforeAfter :: [Content] -> [Content]
-spaceBeforeAfter cs =
- let emptyline = el "empty-line" ()
- in [emptyline] ++ cs ++ [emptyline]
-
--- | Create a plain-text XML content.
-txt :: String -> Content
-txt s = Text $ CData CDataText s Nothing
-
--- | Create an XML attribute with an unqualified name.
-uattr :: String -> String -> Text.XML.Light.Attr
-uattr name val = Attr (uname name) val
-
--- | Create an XML attribute with a qualified name from given namespace.
-attr :: (String, String) -> String -> Text.XML.Light.Attr
-attr (ns, name) val = Attr (qname ns name) val
-
--- | Unqualified name
-uname :: String -> QName
-uname name = QName name Nothing Nothing
-
--- | Qualified name
-qname :: String -> String -> QName
-qname ns name = QName name Nothing (Just ns)
-
--- | Abbreviation for 'concatMap'.
-cMap :: (a -> [b]) -> [a] -> [b]
-cMap = concatMap
-
--- | Monadic equivalent of 'concatMap'.
-cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
-cMapM f xs = concat `liftM` mapM f xs
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
deleted file mode 100644
index 99f8c5b42..000000000
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ /dev/null
@@ -1,1069 +0,0 @@
-{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.HTML
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to HTML.
--}
-module Text.Pandoc.Writers.HTML (
- writeHtml4,
- writeHtml4String,
- writeHtml5,
- writeHtml5String,
- writeHtmlStringForEPUB,
- writeS5,
- writeSlidy,
- writeSlideous,
- writeDZSlides,
- writeRevealJs
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Data.Monoid ((<>))
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates
-import Text.Pandoc.Writers.Math
-import Text.Pandoc.Slides
-import Text.Pandoc.Highlighting ( highlight, styleToCss,
- formatHtmlInline, formatHtmlBlock )
-import Text.Pandoc.XML (fromEntities, escapeStringForXML)
-import Network.URI ( parseURIReference, URI(..), unEscapeString )
-import Network.HTTP ( urlEncode )
-import Numeric ( showHex )
-import Data.Char ( ord, toLower )
-import Data.List ( isPrefixOf, intersperse )
-import Data.String ( fromString )
-import Data.Maybe ( catMaybes, fromMaybe, isJust )
-import Control.Monad.State
-import Text.Blaze.Html hiding(contents)
-#if MIN_VERSION_blaze_markup(0,6,3)
-#else
-import Text.Blaze.Internal(preEscapedString)
-#endif
-#if MIN_VERSION_blaze_html(0,5,1)
-import qualified Text.Blaze.XHtml5 as H5
-#else
-import qualified Text.Blaze.Html5 as H5
-#endif
-import qualified Text.Blaze.XHtml1.Transitional as H
-import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
-import Text.Blaze.Html.Renderer.String (renderHtml)
-import Text.TeXMath
-import Text.XML.Light.Output
-import Text.XML.Light (unode, elChildren, unqual)
-import qualified Text.XML.Light as XML
-import System.FilePath (takeExtension)
-import Data.Aeson (Value)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
-data WriterState = WriterState
- { stNotes :: [Html] -- ^ List of notes
- , stMath :: Bool -- ^ Math is used in document
- , stQuotes :: Bool -- ^ <q> tag is used
- , stHighlighting :: Bool -- ^ Syntax highlighting is used
- , stSecNum :: [Int] -- ^ Number of current section
- , stElement :: Bool -- ^ Processing an Element
- , stHtml5 :: Bool -- ^ Use HTML5
- , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub
- , stSlideVariant :: HTMLSlideVariant
- }
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
- stHighlighting = False, stSecNum = [],
- stElement = False, stHtml5 = False,
- stEPUBVersion = Nothing,
- stSlideVariant = NoSlides}
-
--- Helpers to render HTML with the appropriate function.
-
-strToHtml :: String -> Html
-strToHtml ('\'':xs) = preEscapedString "\'" `mappend` strToHtml xs
-strToHtml xs@(_:_) = case break (=='\'') xs of
- (_ ,[]) -> toHtml xs
- (ys,zs) -> toHtml ys `mappend` strToHtml zs
-strToHtml [] = ""
-
--- | Hard linebreak.
-nl :: WriterOptions -> Html
-nl opts = if writerWrapText opts == WrapNone
- then mempty
- else preEscapedString "\n"
-
--- | Convert Pandoc document to Html 5 string.
-writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeHtml5String = writeHtmlString'
- defaultWriterState{ stHtml5 = True }
-
--- | Convert Pandoc document to Html 5 structure.
-writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
-writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True }
-
--- | Convert Pandoc document to Html 4 string.
-writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeHtml4String = writeHtmlString'
- defaultWriterState{ stHtml5 = False }
-
--- | Convert Pandoc document to Html 4 structure.
-writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
-writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False }
-
--- | Convert Pandoc document to Html appropriate for an epub version.
-writeHtmlStringForEPUB :: PandocMonad m
- => EPUBVersion -> WriterOptions -> Pandoc -> m String
-writeHtmlStringForEPUB version = writeHtmlString'
- defaultWriterState{ stHtml5 = version == EPUB3,
- stEPUBVersion = Just version }
-
--- | Convert Pandoc document to Reveal JS HTML slide show.
-writeRevealJs :: PandocMonad m
- => WriterOptions -> Pandoc -> m String
-writeRevealJs = writeHtmlSlideShow' RevealJsSlides
-
--- | Convert Pandoc document to S5 HTML slide show.
-writeS5 :: PandocMonad m
- => WriterOptions -> Pandoc -> m String
-writeS5 = writeHtmlSlideShow' S5Slides
-
--- | Convert Pandoc document to Slidy HTML slide show.
-writeSlidy :: PandocMonad m
- => WriterOptions -> Pandoc -> m String
-writeSlidy = writeHtmlSlideShow' SlidySlides
-
--- | Convert Pandoc document to Slideous HTML slide show.
-writeSlideous :: PandocMonad m
- => WriterOptions -> Pandoc -> m String
-writeSlideous = writeHtmlSlideShow' SlideousSlides
-
--- | Convert Pandoc document to DZSlides HTML slide show.
-writeDZSlides :: PandocMonad m
- => WriterOptions -> Pandoc -> m String
-writeDZSlides = writeHtmlSlideShow' DZSlides
-
-writeHtmlSlideShow' :: PandocMonad m
- => HTMLSlideVariant -> WriterOptions -> Pandoc -> m String
-writeHtmlSlideShow' variant = writeHtmlString'
- defaultWriterState{ stSlideVariant = variant
- , stHtml5 = case variant of
- RevealJsSlides -> True
- S5Slides -> False
- SlidySlides -> False
- DZSlides -> True
- SlideousSlides -> False
- NoSlides -> False
- }
-
-writeHtmlString' :: PandocMonad m
- => WriterState -> WriterOptions -> Pandoc -> m String
-writeHtmlString' st opts d = do
- (body, context) <- evalStateT (pandocToHtml opts d) st
- return $ case writerTemplate opts of
- Nothing -> renderHtml body
- Just tpl -> renderTemplate' tpl $
- defField "body" (renderHtml body) context
-
-writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
-writeHtml' st opts d = do
- (body, context) <- evalStateT (pandocToHtml opts d) st
- return $ case writerTemplate opts of
- Nothing -> body
- Just tpl -> renderTemplate' tpl $
- defField "body" (renderHtml body) context
-
--- result is (title, authors, date, toc, body, new variables)
-pandocToHtml :: PandocMonad m
- => WriterOptions
- -> Pandoc
- -> StateT WriterState m (Html, Value)
-pandocToHtml opts (Pandoc meta blocks) = do
- metadata <- metaToJSON opts
- (fmap renderHtml . blockListToHtml opts)
- (fmap renderHtml . inlineListToHtml opts)
- meta
- let stringifyHTML = escapeStringForXML . stringify
- let authsMeta = map stringifyHTML $ docAuthors meta
- let dateMeta = stringifyHTML $ docDate meta
- let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
- slideVariant <- gets stSlideVariant
- let sects = hierarchicalize $
- if slideVariant == NoSlides
- then blocks
- else prepSlides slideLevel blocks
- toc <- if writerTableOfContents opts && slideVariant /= S5Slides
- then tableOfContents opts sects
- else return Nothing
- blocks' <- liftM (mconcat . intersperse (nl opts)) $
- mapM (elementToHtml slideLevel opts) sects
- st <- get
- notes <- footnoteSection opts (reverse (stNotes st))
- let thebody = blocks' >> notes
- let math = case writerHTMLMathMethod opts of
- LaTeXMathML (Just url) ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ mempty
- MathJax url ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ case slideVariant of
- SlideousSlides ->
- preEscapedString
- "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
- _ -> mempty
- JsMath (Just url) ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ mempty
- KaTeX js css ->
- (H.script ! A.src (toValue js) $ mempty) <>
- (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
- (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
- _ -> case lookup "mathml-script" (writerVariables opts) of
- Just s | not (stHtml5 st) ->
- H.script ! A.type_ "text/javascript"
- $ preEscapedString
- ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
- | otherwise -> mempty
- Nothing -> mempty
- let context = (if stHighlighting st
- then case writerHighlightStyle opts of
- Just sty -> defField "highlighting-css"
- (styleToCss sty)
- Nothing -> id
- else id) $
- (if stMath st
- then defField "math" (renderHtml math)
- else id) $
- defField "quotes" (stQuotes st) $
- maybe id (defField "toc" . renderHtml) toc $
- defField "author-meta" authsMeta $
- maybe id (defField "date-meta") (normalizeDate dateMeta) $
- defField "pagetitle" (stringifyHTML $ docTitle meta) $
- defField "idprefix" (writerIdentifierPrefix opts) $
- -- these should maybe be set in pandoc.hs
- defField "slidy-url"
- ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $
- defField "slideous-url" ("slideous" :: String) $
- defField "revealjs-url" ("reveal.js" :: String) $
- defField "s5-url" ("s5/default" :: String) $
- defField "html5" (stHtml5 st) $
- metadata
- return (thebody, context)
-
--- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix
-prefixedId :: WriterOptions -> String -> Attribute
-prefixedId opts s =
- case s of
- "" -> mempty
- _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s
-
-toList :: PandocMonad m
- => (Html -> Html)
- -> WriterOptions
- -> [Html]
- -> StateT WriterState m Html
-toList listop opts items = do
- slideVariant <- gets stSlideVariant
- return $
- if (writerIncremental opts)
- then if (slideVariant /= RevealJsSlides)
- then (listop $ mconcat items) ! A.class_ "incremental"
- else listop $ mconcat $ map (! A.class_ "fragment") items
- else listop $ mconcat items
-
-unordList :: PandocMonad m
- => WriterOptions -> [Html] -> StateT WriterState m Html
-unordList opts = toList H.ul opts . toListItems opts
-
-ordList :: PandocMonad m
- => WriterOptions -> [Html] -> StateT WriterState m Html
-ordList opts = toList H.ol opts . toListItems opts
-
-defList :: PandocMonad m
- => WriterOptions -> [Html] -> StateT WriterState m Html
-defList opts items = toList H.dl opts (items ++ [nl opts])
-
--- | Construct table of contents from list of elements.
-tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html)
-tableOfContents _ [] = return Nothing
-tableOfContents opts sects = do
- contents <- mapM (elementToListItem opts) sects
- let tocList = catMaybes contents
- if null tocList
- then return Nothing
- else Just <$> unordList opts tocList
-
--- | Convert section number to string
-showSecNum :: [Int] -> String
-showSecNum = concat . intersperse "." . map show
-
--- | Converts an Element to a list item for a table of contents,
--- retrieving the appropriate identifier from state.
-elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html)
--- Don't include the empty headers created in slide shows
--- shows when an hrule is used to separate slides without a new title:
-elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing
-elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs)
- | lev <= writerTOCDepth opts = do
- let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
- let sectnum = if writerNumberSections opts && not (null num) &&
- "unnumbered" `notElem` classes
- then (H.span ! A.class_ "toc-section-number"
- $ toHtml $ showSecNum num') >> preEscapedString " "
- else mempty
- txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText
- subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
- subList <- if null subHeads
- then return mempty
- else unordList opts subHeads
- -- in reveal.js, we need #/apples, not #apples:
- slideVariant <- gets stSlideVariant
- let revealSlash = ['/' | slideVariant== RevealJsSlides]
- return $ Just
- $ if null id'
- then (H.a $ toHtml txt) >> subList
- else (H.a ! A.href (toValue $ "#" ++ revealSlash ++
- writerIdentifierPrefix opts ++ id')
- $ toHtml txt) >> subList
-elementToListItem _ _ = return Nothing
-
--- | Convert an Element to Html.
-elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html
-elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block
-elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do
- slideVariant <- gets stSlideVariant
- let slide = slideVariant /= NoSlides && level <= slideLevel
- let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
- modify $ \st -> st{stSecNum = num'} -- update section number
- html5 <- gets stHtml5
- let titleSlide = slide && level < slideLevel
- header' <- if title' == [Str "\0"] -- marker for hrule
- then return mempty
- else do
- modify (\st -> st{ stElement = True})
- res <- blockToHtml opts
- (Header level (id',classes,keyvals) title')
- modify (\st -> st{ stElement = False})
- return res
-
- let isSec (Sec _ _ _ _ _) = True
- isSec (Blk _) = False
- let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
- isPause _ = False
- let fragmentClass = case slideVariant of
- RevealJsSlides -> "fragment"
- _ -> "incremental"
- let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\""
- ++ fragmentClass ++ "\">")) :
- (xs ++ [Blk (RawBlock (Format "html") "</div>")])
- innerContents <- mapM (elementToHtml slideLevel opts)
- $ if titleSlide
- -- title slides have no content of their own
- then filter isSec elements
- else case splitBy isPause elements of
- [] -> []
- (x:xs) -> x ++ concatMap inDiv xs
- let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
- let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
- ["section" | (slide || writerSectionDivs opts) &&
- not html5 ] ++
- ["level" ++ show level | slide || writerSectionDivs opts ]
- ++ classes
- let secttag = if html5
- then H5.section
- else H.div
- let attr = (id',classes',keyvals)
- return $ if titleSlide
- then (if slideVariant == RevealJsSlides
- then H5.section
- else id) $ mconcat $
- (addAttrs opts attr $ secttag $ header') : innerContents
- else if writerSectionDivs opts || slide
- then addAttrs opts attr
- $ secttag $ inNl $ header' : innerContents
- else mconcat $ intersperse (nl opts)
- $ addAttrs opts attr header' : innerContents
-
--- | Convert list of Note blocks to a footnote <div>.
--- Assumes notes are sorted.
-footnoteSection :: PandocMonad m
- => WriterOptions -> [Html] -> StateT WriterState m Html
-footnoteSection opts notes = do
- html5 <- gets stHtml5
- slideVariant <- gets stSlideVariant
- let hrtag = if html5 then H5.hr else H.hr
- let container x = if html5
- then H5.section ! A.class_ "footnotes" $ x
- else if slideVariant /= NoSlides
- then H.div ! A.class_ "footnotes slide" $ x
- else H.div ! A.class_ "footnotes" $ x
- return $
- if null notes
- then mempty
- else nl opts >> (container
- $ nl opts >> hrtag >> nl opts >>
- H.ol (mconcat notes >> nl opts) >> nl opts)
-
--- | Parse a mailto link; return Just (name, domain) or Nothing.
-parseMailto :: String -> Maybe (String, String)
-parseMailto s = do
- case break (==':') s of
- (xs,':':addr) | map toLower xs == "mailto" -> do
- let (name', rest) = span (/='@') addr
- let domain = drop 1 rest
- return (name', domain)
- _ -> fail "not a mailto: URL"
-
--- | Obfuscate a "mailto:" link.
-obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html
-obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation =
- return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt
-obfuscateLink opts attr (renderHtml -> txt) s =
- let meth = writerEmailObfuscation opts
- s' = map toLower (take 7 s) ++ drop 7 s
- in case parseMailto s' of
- (Just (name', domain)) ->
- let domain' = substitute "." " dot " domain
- at' = obfuscateChar '@'
- (linkText, altText) =
- if txt == drop 7 s' -- autolink
- then ("e", name' ++ " at " ++ domain')
- else ("'" ++ obfuscateString txt ++ "'",
- txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")")
- in case meth of
- ReferenceObfuscation ->
- -- need to use preEscapedString or &'s are escaped to &amp; in URL
- return $
- preEscapedString $ "<a href=\"" ++ (obfuscateString s')
- ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>"
- JavascriptObfuscation ->
- return $
- (H.script ! A.type_ "text/javascript" $
- preEscapedString ("\n<!--\nh='" ++
- obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
- obfuscateString name' ++ "';e=n+a+h;\n" ++
- "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++
- linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >>
- H.noscript (preEscapedString $ obfuscateString altText)
- _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth
- _ -> return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email
-
--- | Obfuscate character as entity.
-obfuscateChar :: Char -> String
-obfuscateChar char =
- let num = ord char
- numstr = if even num then show num else "x" ++ showHex num ""
- in "&#" ++ numstr ++ ";"
-
--- | Obfuscate string using entities.
-obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . fromEntities
-
-addAttrs :: WriterOptions -> Attr -> Html -> Html
-addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr)
-
-toAttrs :: [(String, String)] -> [Attribute]
-toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs
-
-attrsToHtml :: WriterOptions -> Attr -> [Attribute]
-attrsToHtml opts (id',classes',keyvals) =
- [prefixedId opts id' | not (null id')] ++
- [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals
-
-imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute]
-imgAttrsToHtml opts attr =
- attrsToHtml opts (ident,cls,kvs') ++
- toAttrs (dimensionsToAttrList opts attr)
- where
- (ident,cls,kvs) = attr
- kvs' = filter isNotDim kvs
- isNotDim ("width", _) = False
- isNotDim ("height", _) = False
- isNotDim _ = True
-
-dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)]
-dimensionsToAttrList opts attr = (go Width) ++ (go Height)
- where
- go dir = case (dimension dir attr) of
- (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))]
- (Just dim) -> [(show dir, showInPixel opts dim)]
- _ -> []
-
-
-imageExts :: [String]
-imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
- "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm",
- "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff",
- "wbmp", "xbm", "xpm", "xwd" ]
-
-treatAsImage :: FilePath -> Bool
-treatAsImage fp =
- let path = case uriPath `fmap` parseURIReference fp of
- Nothing -> fp
- Just up -> up
- ext = map toLower $ drop 1 $ takeExtension path
- in null ext || ext `elem` imageExts
-
--- | Convert Pandoc block element to HTML.
-blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
-blockToHtml _ Null = return mempty
-blockToHtml opts (Plain lst) = inlineListToHtml opts lst
--- title beginning with fig: indicates that the image is a figure
-blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do
- img <- inlineToHtml opts (Image attr txt (s,tit))
- html5 <- gets stHtml5
- let tocapt = if html5
- then H5.figcaption
- else H.p ! A.class_ "caption"
- capt <- if null txt
- then return mempty
- else tocapt `fmap` inlineListToHtml opts txt
- return $ if html5
- then H5.figure $ mconcat
- [nl opts, img, capt, nl opts]
- else H.div ! A.class_ "figure" $ mconcat
- [nl opts, img, nl opts, capt, nl opts]
-blockToHtml opts (Para lst)
- | isEmptyRaw lst = return mempty
- | otherwise = do
- contents <- inlineListToHtml opts lst
- return $ H.p contents
- where
- isEmptyRaw [RawInline f _] = f /= (Format "html")
- isEmptyRaw _ = False
-blockToHtml opts (LineBlock lns) =
- if writerWrapText opts == WrapNone
- then blockToHtml opts $ linesToPara lns
- else do
- let lf = preEscapedString "\n"
- htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns
- return $ H.div ! A.style "white-space: pre-line;" $ htmlLines
-blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do
- html5 <- gets stHtml5
- let speakerNotes = "notes" `elem` classes
- -- we don't want incremental output inside speaker notes, see #1394
- let opts' = if speakerNotes then opts{ writerIncremental = False } else opts
- contents <- blockListToHtml opts' bs
- let contents' = nl opts >> contents >> nl opts
- let (divtag, classes') = if html5 && "section" `elem` classes
- then (H5.section, filter (/= "section") classes)
- else (H.div, classes)
- slideVariant <- gets stSlideVariant
- return $
- if speakerNotes
- then case slideVariant of
- RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
- DZSlides -> (addAttrs opts' attr $ H5.div $ contents')
- ! (H5.customAttribute "role" "note")
- NoSlides -> addAttrs opts' attr $ H.div $ contents'
- _ -> mempty
- else addAttrs opts (ident, classes', kvs) $ divtag $ contents'
-blockToHtml opts (RawBlock f str)
- | f == Format "html" = return $ preEscapedString str
- | (f == Format "latex" || f == Format "tex") &&
- allowsMathEnvironments (writerHTMLMathMethod opts) &&
- isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str]
- | otherwise = do
- report $ BlockNotRendered (RawBlock f str)
- return mempty
-blockToHtml _ (HorizontalRule) = do
- html5 <- gets stHtml5
- return $ if html5 then H5.hr else H.hr
-blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
- let tolhs = isEnabled Ext_literate_haskell opts &&
- any (\c -> map toLower c == "haskell") classes &&
- any (\c -> map toLower c == "literate") classes
- classes' = if tolhs
- then map (\c -> if map toLower c == "haskell"
- then "literatehaskell"
- else c) classes
- else classes
- adjCode = if tolhs
- then unlines . map ("> " ++) . lines $ rawCode
- else rawCode
- hlCode = if isJust (writerHighlightStyle opts)
- then highlight formatHtmlBlock
- (id',classes',keyvals) adjCode
- else Nothing
- case hlCode of
- Nothing -> return $ addAttrs opts (id',classes,keyvals)
- $ H.pre $ H.code $ toHtml adjCode
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (addAttrs opts (id',[],keyvals) h)
-blockToHtml opts (BlockQuote blocks) = do
- -- in S5, treat list in blockquote specially
- -- if default is incremental, make it nonincremental;
- -- otherwise incremental
- slideVariant <- gets stSlideVariant
- if slideVariant /= NoSlides
- then let inc = not (writerIncremental opts) in
- case blocks of
- [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
- (BulletList lst)
- [OrderedList attribs lst] ->
- blockToHtml (opts {writerIncremental = inc})
- (OrderedList attribs lst)
- [DefinitionList lst] ->
- blockToHtml (opts {writerIncremental = inc})
- (DefinitionList lst)
- _ -> do contents <- blockListToHtml opts blocks
- return $ H.blockquote
- $ nl opts >> contents >> nl opts
- else do
- contents <- blockListToHtml opts blocks
- return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level attr@(_,classes,_) lst) = do
- contents <- inlineListToHtml opts lst
- secnum <- liftM stSecNum get
- let contents' = if writerNumberSections opts && not (null secnum)
- && "unnumbered" `notElem` classes
- then (H.span ! A.class_ "header-section-number" $ toHtml
- $ showSecNum secnum) >> strToHtml " " >> contents
- else contents
- inElement <- gets stElement
- return $ (if inElement then id else addAttrs opts attr)
- $ case level of
- 1 -> H.h1 contents'
- 2 -> H.h2 contents'
- 3 -> H.h3 contents'
- 4 -> H.h4 contents'
- 5 -> H.h5 contents'
- 6 -> H.h6 contents'
- _ -> H.p contents'
-blockToHtml opts (BulletList lst) = do
- contents <- mapM (blockListToHtml opts) lst
- unordList opts contents
-blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
- contents <- mapM (blockListToHtml opts) lst
- html5 <- gets stHtml5
- let numstyle' = case numstyle of
- Example -> "decimal"
- _ -> camelCaseToHyphenated $ show numstyle
- let attribs = (if startnum /= 1
- then [A.start $ toValue startnum]
- else []) ++
- (if numstyle == Example
- then [A.class_ "example"]
- else []) ++
- (if numstyle /= DefaultStyle
- then if html5
- then [A.type_ $
- case numstyle of
- Decimal -> "1"
- LowerAlpha -> "a"
- UpperAlpha -> "A"
- LowerRoman -> "i"
- UpperRoman -> "I"
- _ -> "1"]
- else [A.style $ toValue $ "list-style-type: " ++
- numstyle']
- else [])
- l <- ordList opts contents
- return $ foldl (!) l attribs
-blockToHtml opts (DefinitionList lst) = do
- contents <- mapM (\(term, defs) ->
- do term' <- if null term
- then return mempty
- else liftM H.dt $ inlineListToHtml opts term
- defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
- blockListToHtml opts) defs
- return $ mconcat $ nl opts : term' : nl opts :
- intersperse (nl opts) defs') lst
- defList opts contents
-blockToHtml opts (Table capt aligns widths headers rows') = do
- captionDoc <- if null capt
- then return mempty
- else do
- cs <- inlineListToHtml opts capt
- return $ H.caption cs >> nl opts
- html5 <- gets stHtml5
- let percent w = show (truncate (100*w) :: Integer) ++ "%"
- let coltags = if all (== 0.0) widths
- then mempty
- else do
- H.colgroup $ do
- nl opts
- mapM_ (\w -> do
- if html5
- then H.col ! A.style (toValue $ "width: " ++
- percent w)
- else H.col ! A.width (toValue $ percent w)
- nl opts) widths
- nl opts
- head' <- if all null headers
- then return mempty
- else do
- contents <- tableRowToHtml opts aligns 0 headers
- return $ H.thead (nl opts >> contents) >> nl opts
- body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $
- zipWithM (tableRowToHtml opts aligns) [1..] rows'
- let tbl = H.table $
- nl opts >> captionDoc >> coltags >> head' >> body' >> nl opts
- let totalWidth = sum widths
- -- When widths of columns are < 100%, we need to set width for the whole
- -- table, or some browsers give us skinny columns with lots of space between:
- return $ if totalWidth == 0 || totalWidth == 1
- then tbl
- else tbl ! A.style (toValue $ "width:" ++
- show (round (totalWidth * 100) :: Int) ++ "%;")
-
-tableRowToHtml :: PandocMonad m
- => WriterOptions
- -> [Alignment]
- -> Int
- -> [[Block]]
- -> StateT WriterState m Html
-tableRowToHtml opts aligns rownum cols' = do
- let mkcell = if rownum == 0 then H.th else H.td
- let rowclass = case rownum of
- 0 -> "header"
- x | x `rem` 2 == 1 -> "odd"
- _ -> "even"
- cols'' <- sequence $ zipWith
- (\alignment item -> tableItemToHtml opts mkcell alignment item)
- aligns cols'
- return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'')
- >> nl opts
-
-alignmentToString :: Alignment -> [Char]
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> ""
-
-tableItemToHtml :: PandocMonad m
- => WriterOptions
- -> (Html -> Html)
- -> Alignment
- -> [Block]
- -> StateT WriterState m Html
-tableItemToHtml opts tag' align' item = do
- contents <- blockListToHtml opts item
- html5 <- gets stHtml5
- let alignStr = alignmentToString align'
- let attribs = if html5
- then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
- else A.align (toValue alignStr)
- let tag'' = if null alignStr
- then tag'
- else tag' ! attribs
- return $ (tag'' $ contents) >> nl opts
-
-toListItems :: WriterOptions -> [Html] -> [Html]
-toListItems opts items = map (toListItem opts) items ++ [nl opts]
-
-toListItem :: WriterOptions -> Html -> Html
-toListItem opts item = nl opts >> H.li item
-
-blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html
-blockListToHtml opts lst =
- fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst
-
--- | Convert list of Pandoc inline elements to HTML.
-inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html
-inlineListToHtml opts lst =
- mapM (inlineToHtml opts) lst >>= return . mconcat
-
--- | Annotates a MathML expression with the tex source
-annotateMML :: XML.Element -> String -> XML.Element
-annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)])
- where
- cs = case elChildren e of
- [] -> unode "mrow" ()
- [x] -> x
- xs -> unode "mrow" xs
- math childs = XML.Element q as [XML.Elem childs] l
- where
- (XML.Element q as _ l) = e
- annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"]
-
-
--- | Convert Pandoc inline element to HTML.
-inlineToHtml :: PandocMonad m
- => WriterOptions -> Inline -> StateT WriterState m Html
-inlineToHtml opts inline = do
- html5 <- gets stHtml5
- case inline of
- (Str str) -> return $ strToHtml str
- (Space) -> return $ strToHtml " "
- (SoftBreak) -> return $ case writerWrapText opts of
- WrapNone -> preEscapedString " "
- WrapAuto -> preEscapedString " "
- WrapPreserve -> preEscapedString "\n"
- (LineBreak) -> return $ (if html5 then H5.br else H.br)
- <> strToHtml "\n"
- (Span (id',classes,kvs) ils)
- -> inlineListToHtml opts ils >>=
- return . addAttrs opts attr' . H.span
- where attr' = (id',classes',kvs')
- classes' = filter (`notElem` ["csl-no-emph",
- "csl-no-strong",
- "csl-no-smallcaps"]) classes
- kvs' = if null styles
- then kvs
- else (("style", concat styles) : kvs)
- styles = ["font-style:normal;"
- | "csl-no-emph" `elem` classes]
- ++ ["font-weight:normal;"
- | "csl-no-strong" `elem` classes]
- ++ ["font-variant:normal;"
- | "csl-no-smallcaps" `elem` classes]
- (Emph lst) -> inlineListToHtml opts lst >>= return . H.em
- (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong
- (Code attr str) -> case hlCode of
- Nothing -> return
- $ addAttrs opts attr
- $ H.code $ strToHtml str
- Just h -> do
- modify $ \st -> st{ stHighlighting = True }
- return $ addAttrs opts (id',[],keyvals) h
- where (id',_,keyvals) = attr
- hlCode = if isJust (writerHighlightStyle opts)
- then highlight formatHtmlInline
- attr str
- else Nothing
- (Strikeout lst) -> inlineListToHtml opts lst >>=
- return . H.del
- (SmallCaps lst) -> inlineListToHtml opts lst >>=
- return . (H.span ! A.style "font-variant: small-caps;")
- (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup
- (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub
- (Quoted quoteType lst) ->
- let (leftQuote, rightQuote) = case quoteType of
- SingleQuote -> (strToHtml "‘",
- strToHtml "’")
- DoubleQuote -> (strToHtml "“",
- strToHtml "”")
- in if writerHtmlQTags opts
- then do
- modify $ \st -> st{ stQuotes = True }
- H.q `fmap` inlineListToHtml opts lst
- else (\x -> leftQuote >> x >> rightQuote)
- `fmap` inlineListToHtml opts lst
- (Math t str) -> do
- modify (\st -> st {stMath = True})
- let mathClass = toValue $ ("math " :: String) ++
- if t == InlineMath then "inline" else "display"
- case writerHTMLMathMethod opts of
- LaTeXMathML _ ->
- -- putting LaTeXMathML in container with class "LaTeX" prevents
- -- non-math elements on the page from being treated as math by
- -- the javascript
- return $ H.span ! A.class_ "LaTeX" $
- case t of
- InlineMath -> toHtml ("$" ++ str ++ "$")
- DisplayMath -> toHtml ("$$" ++ str ++ "$$")
- JsMath _ -> do
- let m = preEscapedString str
- return $ case t of
- InlineMath -> H.span ! A.class_ mathClass $ m
- DisplayMath -> H.div ! A.class_ mathClass $ m
- WebTeX url -> do
- let imtag = if html5 then H5.img else H.img
- let m = imtag ! A.style "vertical-align:middle"
- ! A.src (toValue $ url ++ urlEncode str)
- ! A.alt (toValue str)
- ! A.title (toValue str)
- let brtag = if html5 then H5.br else H.br
- return $ case t of
- InlineMath -> m
- DisplayMath -> brtag >> m >> brtag
- GladTeX ->
- return $ case t of
- InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
- DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
- MathML -> do
- let conf = useShortEmptyTags (const False)
- defaultConfigPP
- res <- lift $ convertMath writeMathML t str
- case res of
- Right r -> return $ preEscapedString $
- ppcElement conf (annotateMML r str)
- Left il -> (H.span ! A.class_ mathClass) <$>
- inlineToHtml opts il
- MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $
- case t of
- InlineMath -> "\\(" ++ str ++ "\\)"
- DisplayMath -> "\\[" ++ str ++ "\\]"
- KaTeX _ _ -> return $ H.span ! A.class_ mathClass $
- toHtml (case t of
- InlineMath -> str
- DisplayMath -> "\\displaystyle " ++ str)
- PlainMath -> do
- x <- lift (texMathToInlines t str) >>= inlineListToHtml opts
- let m = H.span ! A.class_ mathClass $ x
- let brtag = if html5 then H5.br else H.br
- return $ case t of
- InlineMath -> m
- DisplayMath -> brtag >> m >> brtag
- (RawInline f str)
- | f == Format "html" -> return $ preEscapedString str
- | otherwise -> do
- report $ InlineNotRendered inline
- return mempty
- (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do
- linkText <- inlineListToHtml opts txt
- lift $ obfuscateLink opts attr linkText s
- (Link attr txt (s,tit)) -> do
- linkText <- inlineListToHtml opts txt
- slideVariant <- gets stSlideVariant
- let s' = case s of
- '#':xs -> let prefix = if slideVariant == RevealJsSlides
- then "/"
- else writerIdentifierPrefix opts
- in '#' : prefix ++ xs
- _ -> s
- let link = H.a ! A.href (toValue s') $ linkText
- let link' = if txt == [Str (unEscapeString s)]
- then link ! A.class_ "uri"
- else link
- let link'' = addAttrs opts attr link'
- return $ if null tit
- then link''
- else link'' ! A.title (toValue tit)
- (Image attr txt (s,tit)) | treatAsImage s -> do
- let alternate' = stringify txt
- let attributes = [A.src $ toValue s] ++
- [A.title $ toValue tit | not (null tit)] ++
- [A.alt $ toValue alternate' | not (null txt)] ++
- imgAttrsToHtml opts attr
- let tag = if html5 then H5.img else H.img
- return $ foldl (!) tag attributes
- -- note: null title included, as in Markdown.pl
- (Image attr _ (s,tit)) -> do
- let attributes = [A.src $ toValue s] ++
- [A.title $ toValue tit | not (null tit)] ++
- imgAttrsToHtml opts attr
- return $ foldl (!) H5.embed attributes
- -- note: null title included, as in Markdown.pl
- (Note contents) -> do
- notes <- gets stNotes
- let number = (length notes) + 1
- let ref = show number
- htmlContents <- blockListToNote opts ref contents
- epubVersion <- gets stEPUBVersion
- -- push contents onto front of notes
- modify $ \st -> st {stNotes = (htmlContents:notes)}
- slideVariant <- gets stSlideVariant
- let revealSlash = ['/' | slideVariant == RevealJsSlides]
- let link = H.a ! A.href (toValue $ "#" ++
- revealSlash ++
- writerIdentifierPrefix opts ++ "fn" ++ ref)
- ! A.class_ "footnoteRef"
- ! prefixedId opts ("fnref" ++ ref)
- $ (if isJust epubVersion
- then id
- else H.sup)
- $ toHtml ref
- return $ case epubVersion of
- Just EPUB3 -> link ! customAttribute "epub:type" "noteref"
- _ -> link
- (Cite cits il)-> do contents <- inlineListToHtml opts il
- let citationIds = unwords $ map citationId cits
- let result = H.span ! A.class_ "citation" $ contents
- return $ if html5
- then result ! customAttribute "data-cites" (toValue citationIds)
- else result
-
-blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html
-blockListToNote opts ref blocks =
- -- If last block is Para or Plain, include the backlink at the end of
- -- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])]
- blocks' = if null blocks
- then []
- else let lastBlock = last blocks
- otherBlocks = init blocks
- in case lastBlock of
- (Para lst) -> otherBlocks ++
- [Para (lst ++ backlink)]
- (Plain lst) -> otherBlocks ++
- [Plain (lst ++ backlink)]
- _ -> otherBlocks ++ [lastBlock,
- Plain backlink]
- in do contents <- blockListToHtml opts blocks'
- let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents
- epubVersion <- gets stEPUBVersion
- let noteItem' = case epubVersion of
- Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote"
- _ -> noteItem
- return $ nl opts >> noteItem'
-
--- Javascript snippet to render all KaTeX elements
-renderKaTeX :: String
-renderKaTeX = unlines [
- "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");"
- , "for (var i=0; i < mathElements.length; i++)"
- , "{"
- , " var texText = mathElements[i].firstChild"
- , " katex.render(texText.data, mathElements[i])"
- , "}}"
- ]
-
-isMathEnvironment :: String -> Bool
-isMathEnvironment s = "\\begin{" `isPrefixOf` s &&
- envName `elem` mathmlenvs
- where envName = takeWhile (/= '}') (drop 7 s)
- mathmlenvs = [ "align"
- , "align*"
- , "alignat"
- , "alignat*"
- , "aligned"
- , "alignedat"
- , "array"
- , "Bmatrix"
- , "bmatrix"
- , "cases"
- , "CD"
- , "eqnarray"
- , "eqnarray*"
- , "equation"
- , "equation*"
- , "gather"
- , "gather*"
- , "gathered"
- , "matrix"
- , "multline"
- , "multline*"
- , "pmatrix"
- , "smallmatrix"
- , "split"
- , "subarray"
- , "Vmatrix"
- , "vmatrix" ]
-
-allowsMathEnvironments :: HTMLMathMethod -> Bool
-allowsMathEnvironments (MathJax _) = True
-allowsMathEnvironments (MathML) = True
-allowsMathEnvironments (WebTeX _) = True
-allowsMathEnvironments _ = False
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
deleted file mode 100644
index 945e4a0f1..000000000
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ /dev/null
@@ -1,370 +0,0 @@
-{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
-{-
-Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Haddock
- Copyright : Copyright (C) 2014 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to haddock markup.
-
-Haddock: <http://www.haskell.org/haddock/doc/html/>
--}
-module Text.Pandoc.Writers.Haddock (writeHaddock) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Data.List ( intersperse, transpose )
-import Text.Pandoc.Pretty
-import Control.Monad.State
-import Text.Pandoc.Writers.Math (texMathToInlines)
-import Network.URI (isURI)
-import Data.Default
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
-type Notes = [[Block]]
-data WriterState = WriterState { stNotes :: Notes }
-instance Default WriterState
- where def = WriterState{ stNotes = [] }
-
--- | Convert Pandoc to Haddock.
-writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeHaddock opts document =
- evalStateT (pandocToHaddock opts{
- writerWrapText = writerWrapText opts } document) def
-
--- | Return haddock representation of document.
-pandocToHaddock :: PandocMonad m
- => WriterOptions -> Pandoc -> StateT WriterState m String
-pandocToHaddock opts (Pandoc meta blocks) = do
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- body <- blockListToHaddock opts blocks
- st <- get
- notes' <- notesToHaddock opts (reverse $ stNotes st)
- let render' :: Doc -> String
- render' = render colwidth
- let main = render' $ body <>
- (if isEmpty notes' then empty else blankline <> notes')
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToHaddock opts)
- (fmap (render colwidth) . inlineListToHaddock opts)
- meta
- let context = defField "body" main
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Return haddock representation of notes.
-notesToHaddock :: PandocMonad m
- => WriterOptions -> [[Block]] -> StateT WriterState m Doc
-notesToHaddock opts notes =
- if null notes
- then return empty
- else do
- contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes
- return $ text "#notes#" <> blankline <> contents
-
--- | Escape special characters for Haddock.
-escapeString :: String -> String
-escapeString = escapeStringUsing haddockEscapes
- where haddockEscapes = backslashEscapes "\\/'`\"@<"
-
--- | Convert Pandoc block element to haddock.
-blockToHaddock :: PandocMonad m
- => WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> StateT WriterState m Doc
-blockToHaddock _ Null = return empty
-blockToHaddock opts (Div _ ils) = do
- contents <- blockListToHaddock opts ils
- return $ contents <> blankline
-blockToHaddock opts (Plain inlines) = do
- contents <- inlineListToHaddock opts inlines
- return $ contents <> cr
--- title beginning with fig: indicates figure
-blockToHaddock opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
- blockToHaddock opts (Para [Image attr alt (src,tit)])
-blockToHaddock opts (Para inlines) =
- -- TODO: if it contains linebreaks, we need to use a @...@ block
- (<> blankline) `fmap` blockToHaddock opts (Plain inlines)
-blockToHaddock opts (LineBlock lns) =
- blockToHaddock opts $ linesToPara lns
-blockToHaddock _ b@(RawBlock f str)
- | f == "haddock" = do
- return $ text str <> text "\n"
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToHaddock opts HorizontalRule =
- return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline
-blockToHaddock opts (Header level (ident,_,_) inlines) = do
- contents <- inlineListToHaddock opts inlines
- let attr' = if null ident
- then empty
- else cr <> text "#" <> text ident <> text "#"
- return $ nowrap (text (replicate level '=') <> space <> contents)
- <> attr' <> blankline
-blockToHaddock _ (CodeBlock (_,_,_) str) =
- return $ prefixed "> " (text str) <> blankline
--- Nothing in haddock corresponds to block quotes:
-blockToHaddock opts (BlockQuote blocks) =
- blockListToHaddock opts blocks
--- Haddock doesn't have tables. Use haddock tables in code.
-blockToHaddock opts (Table caption aligns widths headers rows) = do
- caption' <- inlineListToHaddock opts caption
- let caption'' = if null caption
- then empty
- else blankline <> caption' <> blankline
- rawHeaders <- mapM (blockListToHaddock opts) headers
- rawRows <- mapM (mapM (blockListToHaddock opts)) rows
- let isSimple = all (==0) widths
- let isPlainBlock (Plain _) = True
- isPlainBlock _ = False
- let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
- (nst,tbl) <- case True of
- _ | isSimple -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | not hasBlocks -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | otherwise -> fmap (id,) $
- gridTable opts (all null headers) aligns widths
- rawHeaders rawRows
- return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline
-blockToHaddock opts (BulletList items) = do
- contents <- mapM (bulletListItemToHaddock opts) items
- return $ cat contents <> blankline
-blockToHaddock opts (OrderedList (start,_,delim) items) = do
- let attribs = (start, Decimal, delim)
- let markers = orderedListMarkers attribs
- let markers' = map (\m -> if length m < 3
- then m ++ replicate (3 - length m) ' '
- else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $
- zip markers' items
- return $ cat contents <> blankline
-blockToHaddock opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToHaddock opts) items
- return $ cat contents <> blankline
-
-pandocTable :: PandocMonad m
- => WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> StateT WriterState m Doc
-pandocTable opts headless aligns widths rawHeaders rawRows = do
- let isSimple = all (==0) widths
- let alignHeader alignment = case alignment of
- AlignLeft -> lblock
- AlignCenter -> cblock
- AlignRight -> rblock
- AlignDefault -> lblock
- let numChars = maximum . map offset
- let widthsInChars = if isSimple
- then map ((+2) . numChars)
- $ transpose (rawHeaders : rawRows)
- else map
- (floor . (fromIntegral (writerColumns opts) *))
- widths
- let makeRow = hcat . intersperse (lblock 1 (text " ")) .
- (zipWith3 alignHeader aligns widthsInChars)
- let rows' = map makeRow rawRows
- let head' = makeRow rawHeaders
- let maxRowHeight = maximum $ map height (head':rows')
- let underline = cat $ intersperse (text " ") $
- map (\width -> text (replicate width '-')) widthsInChars
- let border = if maxRowHeight > 1
- then text (replicate (sum widthsInChars +
- length widthsInChars - 1) '-')
- else if headless
- then underline
- else empty
- let head'' = if headless
- then empty
- else border <> cr <> head'
- let body = if maxRowHeight > 1
- then vsep rows'
- else vcat rows'
- let bottom = if headless
- then underline
- else border
- return $ head'' $$ underline $$ body $$ bottom
-
-gridTable :: PandocMonad m
- => WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> StateT WriterState m Doc
-gridTable opts headless _aligns widths headers' rawRows = do
- let numcols = length headers'
- let widths' = if all (==0) widths
- then replicate numcols (1.0 / fromIntegral numcols)
- else widths
- let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths'
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = maximum (map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = chomp $ hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map (makeRow . map chomp) rawRows
- let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '+'
- let body = vcat $ intersperse (border '-') rows'
- let head'' = if headless
- then empty
- else head' $$ border '='
- return $ border '-' $$ head'' $$ body $$ border '-'
-
--- | Convert bullet list item (list of blocks) to haddock
-bulletListItemToHaddock :: PandocMonad m
- => WriterOptions -> [Block] -> StateT WriterState m Doc
-bulletListItemToHaddock opts items = do
- contents <- blockListToHaddock opts items
- let sps = replicate (writerTabStop opts - 2) ' '
- let start = text ('-' : ' ' : sps)
- -- remove trailing blank line if it is a tight list
- let contents' = case reverse items of
- (BulletList xs:_) | isTightList xs ->
- chomp contents <> cr
- (OrderedList _ xs:_) | isTightList xs ->
- chomp contents <> cr
- _ -> contents
- return $ hang (writerTabStop opts) start $ contents' <> cr
-
--- | Convert ordered list item (a list of blocks) to haddock
-orderedListItemToHaddock :: PandocMonad m
- => WriterOptions -- ^ options
- -> String -- ^ list item marker
- -> [Block] -- ^ list item (list of blocks)
- -> StateT WriterState m Doc
-orderedListItemToHaddock opts marker items = do
- contents <- blockListToHaddock opts items
- let sps = case length marker - writerTabStop opts of
- n | n > 0 -> text $ replicate n ' '
- _ -> text " "
- let start = text marker <> sps
- return $ hang (writerTabStop opts) start $ contents <> cr
-
--- | Convert definition list item (label, list of blocks) to haddock
-definitionListItemToHaddock :: PandocMonad m
- => WriterOptions
- -> ([Inline],[[Block]])
- -> StateT WriterState m Doc
-definitionListItemToHaddock opts (label, defs) = do
- labelText <- inlineListToHaddock opts label
- defs' <- mapM (mapM (blockToHaddock opts)) defs
- let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs'
- return $ nowrap (brackets labelText) <> cr <> contents <> cr
-
--- | Convert list of Pandoc block elements to haddock
-blockListToHaddock :: PandocMonad m
- => WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> StateT WriterState m Doc
-blockListToHaddock opts blocks =
- mapM (blockToHaddock opts) blocks >>= return . cat
-
--- | Convert list of Pandoc inline elements to haddock.
-inlineListToHaddock :: PandocMonad m
- => WriterOptions -> [Inline] -> StateT WriterState m Doc
-inlineListToHaddock opts lst =
- mapM (inlineToHaddock opts) lst >>= return . cat
-
--- | Convert Pandoc inline element to haddock.
-inlineToHaddock :: PandocMonad m
- => WriterOptions -> Inline -> StateT WriterState m Doc
-inlineToHaddock opts (Span (ident,_,_) ils) = do
- contents <- inlineListToHaddock opts ils
- if not (null ident) && null ils
- then return $ "#" <> text ident <> "#"
- else return contents
-inlineToHaddock opts (Emph lst) = do
- contents <- inlineListToHaddock opts lst
- return $ "/" <> contents <> "/"
-inlineToHaddock opts (Strong lst) = do
- contents <- inlineListToHaddock opts lst
- return $ "__" <> contents <> "__"
-inlineToHaddock opts (Strikeout lst) = do
- contents <- inlineListToHaddock opts lst
- -- not supported in haddock, but we fake it:
- return $ "~~" <> contents <> "~~"
--- not supported in haddock:
-inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst
--- not supported in haddock:
-inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst
--- not supported in haddock:
-inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst
-inlineToHaddock opts (Quoted SingleQuote lst) = do
- contents <- inlineListToHaddock opts lst
- return $ "‘" <> contents <> "’"
-inlineToHaddock opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToHaddock opts lst
- return $ "“" <> contents <> "”"
-inlineToHaddock _ (Code _ str) =
- return $ "@" <> text (escapeString str) <> "@"
-inlineToHaddock _ (Str str) = do
- return $ text $ escapeString str
-inlineToHaddock opts (Math mt str) = do
- let adjust x = case mt of
- DisplayMath -> cr <> x <> cr
- InlineMath -> x
- adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts)
-inlineToHaddock _ il@(RawInline f str)
- | f == "haddock" = return $ text str
- | otherwise = do
- report $ InlineNotRendered il
- return empty
--- no line break in haddock (see above on CodeBlock)
-inlineToHaddock _ LineBreak = return cr
-inlineToHaddock opts SoftBreak =
- case writerWrapText opts of
- WrapAuto -> return space
- WrapNone -> return space
- WrapPreserve -> return cr
-inlineToHaddock _ Space = return space
-inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst
-inlineToHaddock _ (Link _ txt (src, _)) = do
- let linktext = text $ escapeString $ stringify txt
- let useAuto = isURI src &&
- case txt of
- [Str s] | escapeURI s == src -> True
- _ -> False
- return $ nowrap $ "<" <> text src <>
- (if useAuto then empty else space <> linktext) <> ">"
-inlineToHaddock opts (Image attr alternate (source, tit)) = do
- linkhaddock <- inlineToHaddock opts (Link attr alternate (source, tit))
- return $ "<" <> linkhaddock <> ">"
--- haddock doesn't have notes, but we can fake it:
-inlineToHaddock opts (Note contents) = do
- modify (\st -> st{ stNotes = contents : stNotes st })
- st <- get
- let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st)
- return $ "<#notes [" <> ref <> "]>"
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
deleted file mode 100644
index efec17d26..000000000
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ /dev/null
@@ -1,584 +0,0 @@
-{-# LANGUAGE OverloadedStrings, FlexibleContexts, ScopedTypeVariables #-}
-
-{- |
- Module : Text.Pandoc.Writers.ICML
- Copyright : Copyright (C) 2013-2016 github.com/mb21
- License : GNU GPL, version 2 or above
-
- Stability : alpha
-
-Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format
-which is a subset of the zipped IDML format for which the documentation is
-available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf
-InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated
-into InDesign with File -> Place.
--}
-module Text.Pandoc.Writers.ICML (writeICML) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Error (PandocError)
-import Text.Pandoc.XML
-import Text.Pandoc.Writers.Math (texMathToInlines)
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Shared (linesToPara, splitBy)
-import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse)
-import Data.Text as Text (breakOnAll, pack)
-import Control.Monad.State
-import Control.Monad.Except (runExceptT)
-import Network.URI (isURI)
-import qualified Data.Set as Set
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-import qualified Text.Pandoc.Class as P
-
-type Style = [String]
-type Hyperlink = [(Int, String)]
-
-data WriterState = WriterState{
- blockStyles :: Set.Set String
- , inlineStyles :: Set.Set String
- , links :: Hyperlink
- , listDepth :: Int
- , maxListDepth :: Int
- }
-
-type WS m = StateT WriterState m
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState{
- blockStyles = Set.empty
- , inlineStyles = Set.empty
- , links = []
- , listDepth = 1
- , maxListDepth = 0
- }
-
--- inline names (appear in InDesign's character styles pane)
-emphName :: String
-strongName :: String
-strikeoutName :: String
-superscriptName :: String
-subscriptName :: String
-smallCapsName :: String
-codeName :: String
-linkName :: String
-emphName = "Italic"
-strongName = "Bold"
-strikeoutName = "Strikeout"
-superscriptName = "Superscript"
-subscriptName = "Subscript"
-smallCapsName = "SmallCaps"
-codeName = "Code"
-linkName = "Link"
-
--- block element names (appear in InDesign's paragraph styles pane)
-paragraphName :: String
-figureName :: String
-imgCaptionName :: String
-codeBlockName :: String
-blockQuoteName :: String
-orderedListName :: String
-bulletListName :: String
-defListTermName :: String
-defListDefName :: String
-headerName :: String
-tableName :: String
-tableHeaderName :: String
-tableCaptionName :: String
-alignLeftName :: String
-alignRightName :: String
-alignCenterName :: String
-firstListItemName :: String
-beginsWithName :: String
-lowerRomanName :: String
-upperRomanName :: String
-lowerAlphaName :: String
-upperAlphaName :: String
-subListParName :: String
-footnoteName :: String
-citeName :: String
-paragraphName = "Paragraph"
-figureName = "Figure"
-imgCaptionName = "Caption"
-codeBlockName = "CodeBlock"
-blockQuoteName = "Blockquote"
-orderedListName = "NumList"
-bulletListName = "BulList"
-defListTermName = "DefListTerm"
-defListDefName = "DefListDef"
-headerName = "Header"
-tableName = "TablePar"
-tableHeaderName = "TableHeader"
-tableCaptionName = "TableCaption"
-alignLeftName = "LeftAlign"
-alignRightName = "RightAlign"
-alignCenterName = "CenterAlign"
-firstListItemName = "first"
-beginsWithName = "beginsWith-"
-lowerRomanName = "lowerRoman"
-upperRomanName = "upperRoman"
-lowerAlphaName = "lowerAlpha"
-upperAlphaName = "upperAlpha"
-subListParName = "subParagraph"
-footnoteName = "Footnote"
-citeName = "Cite"
-
--- | Convert Pandoc document to string in ICML format.
-writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeICML opts (Pandoc meta blocks) = do
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- render' = render colwidth
- renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState
- metadata <- metaToJSON opts
- (renderMeta blocksToICML)
- (renderMeta inlinesToICML)
- meta
- (doc, st) <- runStateT (blocksToICML opts [] blocks) defaultWriterState
- let main = render' doc
- context = defField "body" main
- $ defField "charStyles" (render' $ charStylesToDoc st)
- $ defField "parStyles" (render' $ parStylesToDoc st)
- $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
- $ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
--- | Auxilary functions for parStylesToDoc and charStylesToDoc.
-contains :: String -> (String, (String, String)) -> [(String, String)]
-contains s rule =
- if isInfixOf (fst rule) s
- then [snd rule]
- else []
-
--- | The monospaced font to use as default.
-monospacedFont :: Doc
-monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New"
-
--- | How much to indent blockquotes etc.
-defaultIndent :: Int
-defaultIndent = 20
-
--- | How much to indent numbered lists before the number.
-defaultListIndent :: Int
-defaultListIndent = 10
-
--- other constants
-lineSeparator :: String
-lineSeparator = "&#x2028;"
-
--- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles.
-parStylesToDoc :: WriterState -> Doc
-parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
- where
- makeStyle s =
- let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str)
- attrs = concat $ map (contains s) $ [
- (defListTermName, ("BulletsAndNumberingListType", "BulletList"))
- , (defListTermName, ("FontStyle", "Bold"))
- , (tableHeaderName, ("FontStyle", "Bold"))
- , (alignLeftName, ("Justification", "LeftAlign"))
- , (alignRightName, ("Justification", "RightAlign"))
- , (alignCenterName, ("Justification", "CenterAlign"))
- , (headerName++"1", ("PointSize", "36"))
- , (headerName++"2", ("PointSize", "30"))
- , (headerName++"3", ("PointSize", "24"))
- , (headerName++"4", ("PointSize", "18"))
- , (headerName++"5", ("PointSize", "14"))
- ]
- -- what is the most nested list type, if any?
- (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s
- where
- findList [] = (False, False)
- findList (x:xs) | x == bulletListName = (True, False)
- | x == orderedListName = (False, True)
- | otherwise = findList xs
- nBuls = countSubStrs bulletListName s
- nOrds = countSubStrs orderedListName s
- attrs' = numbering ++ listType ++ indent ++ attrs
- where
- numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)]
- | otherwise = []
- listType | isOrderedList && (not $ isInfixOf subListParName s)
- = [("BulletsAndNumberingListType", "NumberedList")]
- | isBulletList && (not $ isInfixOf subListParName s)
- = [("BulletsAndNumberingListType", "BulletList")]
- | otherwise = []
- indent = [("LeftIndent", show indt)]
- where
- nBlockQuotes = countSubStrs blockQuoteName s
- nDefLists = countSubStrs defListDefName s
- indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists)
- props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm)
- where
- font = if isInfixOf codeBlockName s
- then monospacedFont
- else empty
- basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font
- tabList = if isBulletList
- then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")]
- $ vcat [
- inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign"
- , inTags False "AlignmentCharacter" [("type","string")] $ text "."
- , selfClosingTag "Leader" [("type","string")]
- , inTags False "Position" [("type","unit")] $ text
- $ show $ defaultListIndent * (nBuls + nOrds)
- ]
- else empty
- makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name)
- numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..."
- | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..."
- | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..."
- | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..."
- | otherwise = empty
- in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props
-
--- | Convert a WriterState with its inline styles to the ICML listing of Character Styles.
-charStylesToDoc :: WriterState -> Doc
-charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
- where
- makeStyle s =
- let attrs = concat $ map (contains s) [
- (strikeoutName, ("StrikeThru", "true"))
- , (superscriptName, ("Position", "Superscript"))
- , (subscriptName, ("Position", "Subscript"))
- , (smallCapsName, ("Capitalization", "SmallCaps"))
- ]
- attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs
- | isInfixOf strongName s = ("FontStyle", "Bold") : attrs
- | isInfixOf emphName s = ("FontStyle", "Italic") : attrs
- | otherwise = attrs
- props = inTags True "Properties" [] $
- inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font
- where
- font =
- if isInfixOf codeName s
- then monospacedFont
- else empty
- in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props
-
--- | Escape colon characters as %3a
-escapeColons :: String -> String
-escapeColons (x:xs)
- | x == ':' = "%3a" ++ escapeColons xs
- | otherwise = x : escapeColons xs
-escapeColons [] = []
-
--- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks.
-hyperlinksToDoc :: Hyperlink -> Doc
-hyperlinksToDoc [] = empty
-hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
- where
- hyp (ident, url) = hdest $$ hlink
- where
- hdest = selfClosingTag "HyperlinkURLDestination"
- [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6
- hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url),
- ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")]
- $ inTags True "Properties" []
- $ inTags False "BorderColor" [("type","enumeration")] (text "Black")
- $$ (inTags False "Destination" [("type","object")]
- $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6
-
-
--- | Convert a list of Pandoc blocks to ICML.
-blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
-blocksToICML opts style lst = do
- docs <- mapM (blockToICML opts style) lst
- return $ intersperseBrs docs
-
--- | Convert a Pandoc block element to ICML.
-blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc
-blockToICML opts style (Plain lst) = parStyle opts style lst
--- title beginning with fig: indicates that the image is a figure
-blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do
- figure <- parStyle opts (figureName:style) img
- caption <- parStyle opts (imgCaptionName:style) txt
- return $ intersperseBrs [figure, caption]
-blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
-blockToICML opts style (LineBlock lns) =
- blockToICML opts style $ linesToPara lns
-blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
-blockToICML _ _ b@(RawBlock f str)
- | f == Format "icml" = return $ text str
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
-blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
-blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
-blockToICML opts style (DefinitionList lst) = intersperseBrs `fmap` mapM (definitionListItemToICML opts style) lst
-blockToICML opts style (Header lvl _ lst) =
- let stl = (headerName ++ show lvl):style
- in parStyle opts stl lst
-blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead
-blockToICML opts style (Table caption aligns widths headers rows) =
- let style' = tableName : style
- noHeader = all null headers
- nrHeaders = if noHeader
- then "0"
- else "1"
- nrRows = length rows
- nrCols = if null rows
- then 0
- else length $ head rows
- rowsToICML [] _ = return empty
- rowsToICML (col:rest) rowNr =
- liftM2 ($$) (colsToICML col aligns rowNr (0::Int)) $ rowsToICML rest (rowNr+1)
- colsToICML [] _ _ _ = return empty
- colsToICML _ [] _ _ = return empty
- colsToICML (cell:rest) (alig:restAligns) rowNr colNr = do
- let stl = if rowNr == 0 && not noHeader
- then tableHeaderName:style'
- else style'
- stl' | alig == AlignLeft = alignLeftName : stl
- | alig == AlignRight = alignRightName : stl
- | alig == AlignCenter = alignCenterName : stl
- | otherwise = stl
- c <- blocksToICML opts stl' cell
- let cl = return $ inTags True "Cell"
- [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c
- liftM2 ($$) cl $ colsToICML rest restAligns rowNr (colNr+1)
- in do
- let tabl = if noHeader
- then rows
- else headers:rows
- cells <- rowsToICML tabl (0::Int)
- let colWidths w = if w > 0
- then [("SingleColumnWidth",show $ 500 * w)]
- else []
- let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup)
- let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths
- let tableDoc = return $ inTags True "Table" [
- ("AppliedTableStyle","TableStyle/Table")
- , ("HeaderRowCount", nrHeaders)
- , ("BodyRowCount", show nrRows)
- , ("ColumnCount", show nrCols)
- ] (colDescs $$ cells)
- liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption
-blockToICML opts style (Div _ lst) = blocksToICML opts style lst
-blockToICML _ _ Null = return empty
-
--- | Convert a list of lists of blocks to ICML list items.
-listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc
-listItemsToICML _ _ _ _ [] = return empty
-listItemsToICML opts listType style attribs (first:rest) = do
- st <- get
- put st{ listDepth = 1 + listDepth st}
- let stl = listType:style
- let f = listItemToICML opts stl True attribs first
- let r = map (listItemToICML opts stl False attribs) rest
- docs <- sequence $ f:r
- s <- get
- let maxD = max (maxListDepth s) (listDepth s)
- put s{ listDepth = 1, maxListDepth = maxD }
- return $ intersperseBrs docs
-
--- | Convert a list of blocks to ICML list items.
-listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc
-listItemToICML opts style isFirst attribs item =
- let makeNumbStart (Just (beginsWith, numbStl, _)) =
- let doN DefaultStyle = []
- doN LowerRoman = [lowerRomanName]
- doN UpperRoman = [upperRomanName]
- doN LowerAlpha = [lowerAlphaName]
- doN UpperAlpha = [upperAlphaName]
- doN _ = []
- bw = if beginsWith > 1
- then [beginsWithName ++ show beginsWith]
- else []
- in doN numbStl ++ bw
- makeNumbStart Nothing = []
- stl = if isFirst
- then firstListItemName:style
- else style
- stl' = makeNumbStart attribs ++ stl
- in if length item > 1
- then do
- let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst
- insertTab block = blockToICML opts style block
- f <- blockToICML opts stl' $ head item
- r <- mapM insertTab $ tail item
- return $ intersperseBrs (f : r)
- else blocksToICML opts stl' item
-
-definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc
-definitionListItemToICML opts style (term,defs) = do
- term' <- parStyle opts (defListTermName:style) term
- defs' <- mapM (blocksToICML opts (defListDefName:style)) defs
- return $ intersperseBrs $ (term' : defs')
-
-
--- | Convert a list of inline elements to ICML.
-inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
-inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
-
--- | Convert an inline element to ICML.
-inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc
-inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str
-inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst
-inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst
-inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst
-inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst
-inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst
-inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
-inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
-inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
-inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst
-inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
-inlineToICML _ style Space = charStyle style space
-inlineToICML opts style SoftBreak =
- case writerWrapText opts of
- WrapAuto -> charStyle style space
- WrapNone -> charStyle style space
- WrapPreserve -> charStyle style cr
-inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
-inlineToICML opts style (Math mt str) =
- lift (texMathToInlines mt str) >>=
- (fmap cat . mapM (inlineToICML opts style))
-inlineToICML _ _ il@(RawInline f str)
- | f == Format "icml" = return $ text str
- | otherwise = do
- report $ InlineNotRendered il
- return empty
-inlineToICML opts style (Link _ lst (url, title)) = do
- content <- inlinesToICML opts (linkName:style) lst
- state $ \st ->
- let ident = if null $ links st
- then 1::Int
- else 1 + (fst $ head $ links st)
- newst = st{ links = (ident, url):(links st) }
- cont = inTags True "HyperlinkTextSource"
- [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content
- in (cont, newst)
-inlineToICML opts style (Image attr _ target) = imageICML opts style attr target
-inlineToICML opts style (Note lst) = footnoteToICML opts style lst
-inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst
-
--- | Convert a list of block elements to an ICML footnote.
-footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
-footnoteToICML opts style lst =
- let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls
- insertTab block = blockToICML opts (footnoteName:style) block
- in do
- contents <- mapM insertTab lst
- let number = inTags True "ParagraphStyleRange" [] $
- inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>"
- return $ inTags True "CharacterStyleRange"
- [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")]
- $ inTags True "Footnote" [] $ number $$ intersperseBrs contents
-
--- | Auxiliary function to merge Space elements into the adjacent Strs.
-mergeSpaces :: [Inline] -> [Inline]
-mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x =
- mergeSpaces $ Str(s++" "++s') : xs
-mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs
-mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs
-mergeSpaces (x:xs) = x : (mergeSpaces xs)
-mergeSpaces [] = []
-
-isSp :: Inline -> Bool
-isSp Space = True
-isSp SoftBreak = True
-isSp _ = False
-
--- | Intersperse line breaks
-intersperseBrs :: [Doc] -> Doc
-intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty)
-
--- | Wrap a list of inline elements in an ICML Paragraph Style
-parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
-parStyle opts style lst =
- let slipIn x y = if null y
- then x
- else x ++ " > " ++ y
- stlStr = foldr slipIn [] $ reverse style
- stl = if null stlStr
- then ""
- else "ParagraphStyle/" ++ stlStr
- attrs = ("AppliedParagraphStyle", stl)
- attrs' = if firstListItemName `elem` style
- then let ats = attrs : [("NumberingContinue", "false")]
- begins = filter (isPrefixOf beginsWithName) style
- in if null begins
- then ats
- else let i = maybe "" id $ stripPrefix beginsWithName $ head begins
- in ("NumberingStartAt", i) : ats
- else [attrs]
- in do
- content <- inlinesToICML opts [] lst
- let cont = inTags True "ParagraphStyleRange" attrs' content
- state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st })
-
--- | Wrap a Doc in an ICML Character Style.
-charStyle :: PandocMonad m => Style -> Doc -> WS m Doc
-charStyle style content =
- let (stlStr, attrs) = styleToStrAttr style
- doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
- in do
- state $ \st ->
- let styles = if null stlStr
- then st
- else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
- in (doc, styles)
-
--- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute.
-styleToStrAttr :: Style -> (String, [(String, String)])
-styleToStrAttr style =
- let stlStr = unwords $ Set.toAscList $ Set.fromList style
- stl = if null style
- then "$ID/NormalCharacterStyle"
- else "CharacterStyle/" ++ stlStr
- attrs = [("AppliedCharacterStyle", stl)]
- in (stlStr, attrs)
-
--- | Assemble an ICML Image.
-imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc
-imageICML opts style attr (src, _) = do
- res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
- imgS <- case res of
- Left (_ :: PandocError) -> do
- report $ CouldNotFetchResource src ""
- return def
- Right (img, _) -> do
- case imageSize img of
- Right size -> return size
- Left msg -> do
- report $ CouldNotDetermineImageSize src msg
- return def
- let (ow, oh) = sizeInPoints imgS
- (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS
- hw = showFl $ ow / 2
- hh = showFl $ oh / 2
- scale = showFl (imgWidth / ow) ++ " 0 0 " ++ showFl (imgHeight / oh)
- src' = if isURI src then src else "file:" ++ src
- (stlStr, attrs) = styleToStrAttr style
- props = inTags True "Properties" [] $ inTags True "PathGeometry" []
- $ inTags True "GeometryPathType" [("PathOpen","false")]
- $ inTags True "PathPointArray" []
- $ vcat [
- selfClosingTag "PathPointType" [("Anchor", "-"++hw++" -"++hh),
- ("LeftDirection", "-"++hw++" -"++hh), ("RightDirection", "-"++hw++" -"++hh)]
- , selfClosingTag "PathPointType" [("Anchor", "-"++hw++" "++hh),
- ("LeftDirection", "-"++hw++" "++hh), ("RightDirection", "-"++hw++" "++hh)]
- , selfClosingTag "PathPointType" [("Anchor", hw++" "++hh),
- ("LeftDirection", hw++" "++hh), ("RightDirection", hw++" "++hh)]
- , selfClosingTag "PathPointType" [("Anchor", hw++" -"++hh),
- ("LeftDirection", hw++" -"++hh), ("RightDirection", hw++" -"++hh)]
- ]
- image = inTags True "Image"
- [("Self","ue6"), ("ItemTransform", scale++" -"++hw++" -"++hh)]
- $ vcat [
- inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
- , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')]
- ]
- doc = inTags True "CharacterStyleRange" attrs
- $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"),
- ("ItemTransform", scale++" "++hw++" -"++hh)]
- $ (props $$ image)
- state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } )
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
deleted file mode 100644
index ac2b5d758..000000000
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ /dev/null
@@ -1,1388 +0,0 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables,
- PatternGuards #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.LaTeX
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' format into LaTeX.
--}
-module Text.Pandoc.Writers.LaTeX (
- writeLaTeX
- , writeBeamer
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Templates
-import Text.Pandoc.Logging
-import Text.Printf ( printf )
-import Network.URI ( isURI, unEscapeString )
-import Data.Aeson (object, (.=), FromJSON)
-import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse,
- nub, nubBy, foldl' )
-import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit,
- ord, isAlphaNum )
-import Data.Maybe ( fromMaybe, isJust, catMaybes )
-import qualified Data.Text as T
-import Control.Applicative ((<|>))
-import Control.Monad.State
-import qualified Text.Parsec as P
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Slides
-import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
- formatLaTeXInline, formatLaTeXBlock,
- toListingsLanguage)
-import Text.Pandoc.Class (PandocMonad, report)
-
-data WriterState =
- WriterState { stInNote :: Bool -- true if we're in a note
- , stInQuote :: Bool -- true if in a blockquote
- , stInMinipage :: Bool -- true if in minipage
- , stInHeading :: Bool -- true if in a section heading
- , stNotes :: [Doc] -- notes in a minipage
- , stOLLevel :: Int -- level of ordered list nesting
- , stOptions :: WriterOptions -- writer options, so they don't have to be parameter
- , stVerbInNote :: Bool -- true if document has verbatim text in note
- , stTable :: Bool -- true if document has a table
- , stStrikeout :: Bool -- true if document has strikeout
- , stUrl :: Bool -- true if document has visible URL link
- , stGraphics :: Bool -- true if document contains images
- , stLHS :: Bool -- true if document has literate haskell code
- , stBook :: Bool -- true if document uses book or memoir class
- , stCsquotes :: Bool -- true if document uses csquotes
- , stHighlighting :: Bool -- true if document has highlighted code
- , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit
- , stInternalLinks :: [String] -- list of internal link targets
- , stUsesEuro :: Bool -- true if euro symbol used
- , stBeamer :: Bool -- produce beamer
- }
-
-startingState :: WriterOptions -> WriterState
-startingState options = WriterState {
- stInNote = False
- , stInQuote = False
- , stInMinipage = False
- , stInHeading = False
- , stNotes = []
- , stOLLevel = 1
- , stOptions = options
- , stVerbInNote = False
- , stTable = False
- , stStrikeout = False
- , stUrl = False
- , stGraphics = False
- , stLHS = False
- , stBook = (case writerTopLevelDivision options of
- TopLevelPart -> True
- TopLevelChapter -> True
- _ -> False)
- , stCsquotes = False
- , stHighlighting = False
- , stIncremental = writerIncremental options
- , stInternalLinks = []
- , stUsesEuro = False
- , stBeamer = False }
-
--- | Convert Pandoc to LaTeX.
-writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeLaTeX options document =
- evalStateT (pandocToLaTeX options document) $
- startingState options
-
--- | Convert Pandoc to LaTeX Beamer.
-writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeBeamer options document =
- evalStateT (pandocToLaTeX options document) $
- (startingState options){ stBeamer = True }
-
-type LW m = StateT WriterState m
-
-pandocToLaTeX :: PandocMonad m
- => WriterOptions -> Pandoc -> LW m String
-pandocToLaTeX options (Pandoc meta blocks) = do
- -- Strip off final 'references' header if --natbib or --biblatex
- let method = writerCiteMethod options
- let blocks' = if method == Biblatex || method == Natbib
- then case reverse blocks of
- (Div (_,["references"],_) _):xs -> reverse xs
- _ -> blocks
- else blocks
- -- see if there are internal links
- let isInternalLink (Link _ _ ('#':xs,_)) = [xs]
- isInternalLink _ = []
- modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
- let template = maybe "" id $ writerTemplate options
- -- set stBook depending on documentclass
- let colwidth = if writerWrapText options == WrapAuto
- then Just $ writerColumns options
- else Nothing
- metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToLaTeX)
- (fmap (render colwidth) . inlineListToLaTeX)
- meta
- let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
- let documentClass = case P.parse pDocumentClass "template" template of
- Right r -> r
- Left _ -> ""
- case lookup "documentclass" (writerVariables options) `mplus`
- fmap stringify (lookupMeta "documentclass" meta) of
- Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
- | otherwise -> return ()
- Nothing | documentClass `elem` bookClasses
- -> modify $ \s -> s{stBook = True}
- | otherwise -> return ()
- -- check for \usepackage...{csquotes}; if present, we'll use
- -- \enquote{...} for smart quotes:
- let headerIncludesField :: FromJSON a => Maybe a
- headerIncludesField = getField "header-includes" metadata
- let headerIncludes = fromMaybe [] $ mplus
- (fmap return headerIncludesField)
- headerIncludesField
- when (any (isInfixOf "{csquotes}") (template : headerIncludes)) $
- modify $ \s -> s{stCsquotes = True}
- let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
- (blocks', [])
- else case last blocks' of
- Header 1 _ il -> (init blocks', il)
- _ -> (blocks', [])
- beamer <- gets stBeamer
- blocks''' <- if beamer
- then toSlides blocks''
- else return blocks''
- body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'''
- (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
- let main = render colwidth $ vsep body
- st <- get
- titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
- authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
- let docLangs = nub $ query (extract "lang") blocks
- let hasStringValue x = isJust (getField x metadata :: Maybe String)
- let geometryFromMargins = intercalate [','] $ catMaybes $
- map (\(x,y) ->
- ((x ++ "=") ++) <$> getField y metadata)
- [("lmargin","margin-left")
- ,("rmargin","margin-right")
- ,("tmargin","margin-top")
- ,("bmargin","margin-bottom")
- ]
- let context = defField "toc" (writerTableOfContents options) $
- defField "toc-depth" (show (writerTOCDepth options -
- if stBook st
- then 1
- else 0)) $
- defField "body" main $
- defField "title-meta" titleMeta $
- defField "author-meta" (intercalate "; " authorsMeta) $
- defField "documentclass" (if beamer
- then ("beamer" :: String)
- else if stBook st
- then "book"
- else "article") $
- defField "verbatim-in-note" (stVerbInNote st) $
- defField "tables" (stTable st) $
- defField "strikeout" (stStrikeout st) $
- defField "url" (stUrl st) $
- defField "numbersections" (writerNumberSections options) $
- defField "lhs" (stLHS st) $
- defField "graphics" (stGraphics st) $
- defField "book-class" (stBook st) $
- defField "euro" (stUsesEuro st) $
- defField "listings" (writerListings options || stLHS st) $
- defField "beamer" beamer $
- (if stHighlighting st
- then case writerHighlightStyle options of
- Just sty ->
- defField "highlighting-macros"
- (styleToLaTeX sty)
- Nothing -> id
- else id) $
- (case writerCiteMethod options of
- Natbib -> defField "biblio-title" biblioTitle .
- defField "natbib" True
- Biblatex -> defField "biblio-title" biblioTitle .
- defField "biblatex" True
- _ -> id) $
- -- set lang to something so polyglossia/babel is included
- defField "lang" (if null docLangs then ""::String else "en") $
- defField "otherlangs" docLangs $
- defField "colorlinks" (any hasStringValue
- ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
- defField "dir" (if (null $ query (extract "dir") blocks)
- then ""::String
- else "ltr") $
- defField "section-titles" True $
- defField "geometry" geometryFromMargins $
- metadata
- let toPolyObj lang = object [ "name" .= T.pack name
- , "options" .= T.pack opts ]
- where
- (name, opts) = toPolyglossia lang
- let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
- otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
- let context' =
- defField "babel-lang" (toBabel lang)
- $ defField "babel-otherlangs" (map toBabel otherlangs)
- $ defField "babel-newcommands" (concatMap (\(poly, babel) ->
- -- \textspanish and \textgalician are already used by babel
- -- save them as \oritext... and let babel use that
- if poly `elem` ["spanish", "galician"]
- then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
- "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
- "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
- ++ poly ++ "}}\n" ++
- "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
- "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
- ++ poly ++ "}{##2}}}\n"
- else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
- ++ babel ++ "}{#2}}\n" ++
- "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{"
- ++ babel ++ "}}{\\end{otherlanguage}}\n"
- )
- -- eliminate duplicates that have same polyglossia name
- $ nubBy (\a b -> fst a == fst b)
- -- find polyglossia and babel names of languages used in the document
- $ map (\l ->
- let lng = splitBy (=='-') l
- in (fst $ toPolyglossia lng, toBabel lng)
- )
- docLangs )
- $ defField "polyglossia-lang" (toPolyObj lang)
- $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
- $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
- Just "rtl" -> True
- _ -> False)
- $ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
-
--- | Convert Elements to LaTeX
-elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc
-elementToLaTeX _ (Blk block) = blockToLaTeX block
-elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do
- modify $ \s -> s{stInHeading = True}
- header' <- sectionHeader ("unnumbered" `elem` classes) id' level title'
- modify $ \s -> s{stInHeading = False}
- innerContents <- mapM (elementToLaTeX opts) elements
- return $ vsep (header' : innerContents)
-
-data StringContext = TextString
- | URLString
- | CodeString
- deriving (Eq)
-
--- escape things as needed for LaTeX
-stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String
-stringToLaTeX _ [] = return ""
-stringToLaTeX ctx (x:xs) = do
- opts <- gets stOptions
- rest <- stringToLaTeX ctx xs
- let ligatures = isEnabled Ext_smart opts && ctx == TextString
- let isUrl = ctx == URLString
- when (x == '€') $
- modify $ \st -> st{ stUsesEuro = True }
- return $
- case x of
- '€' -> "\\euro{}" ++ rest
- '{' -> "\\{" ++ rest
- '}' -> "\\}" ++ rest
- '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest
- '$' | not isUrl -> "\\$" ++ rest
- '%' -> "\\%" ++ rest
- '&' -> "\\&" ++ rest
- '_' | not isUrl -> "\\_" ++ rest
- '#' -> "\\#" ++ rest
- '-' | not isUrl -> case xs of
- -- prevent adjacent hyphens from forming ligatures
- ('-':_) -> "-\\/" ++ rest
- _ -> '-' : rest
- '~' | not isUrl -> "\\textasciitilde{}" ++ rest
- '^' -> "\\^{}" ++ rest
- '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows
- | otherwise -> "\\textbackslash{}" ++ rest
- '|' | not isUrl -> "\\textbar{}" ++ rest
- '<' -> "\\textless{}" ++ rest
- '>' -> "\\textgreater{}" ++ rest
- '[' -> "{[}" ++ rest -- to avoid interpretation as
- ']' -> "{]}" ++ rest -- optional arguments
- '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest
- '\160' -> "~" ++ rest
- '\x202F' -> "\\," ++ rest
- '\x2026' -> "\\ldots{}" ++ rest
- '\x2018' | ligatures -> "`" ++ rest
- '\x2019' | ligatures -> "'" ++ rest
- '\x201C' | ligatures -> "``" ++ rest
- '\x201D' | ligatures -> "''" ++ rest
- '\x2014' | ligatures -> "---" ++ rest
- '\x2013' | ligatures -> "--" ++ rest
- _ -> x : rest
-
-toLabel :: PandocMonad m => String -> LW m String
-toLabel z = go `fmap` stringToLaTeX URLString z
- where go [] = ""
- go (x:xs)
- | (isLetter x || isDigit x) && isAscii x = x:go xs
- | elem x ("_-+=:;." :: String) = x:go xs
- | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs
-
--- | Puts contents into LaTeX command.
-inCmd :: String -> Doc -> Doc
-inCmd cmd contents = char '\\' <> text cmd <> braces contents
-
-toSlides :: PandocMonad m => [Block] -> LW m [Block]
-toSlides bs = do
- opts <- gets stOptions
- let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts
- let bs' = prepSlides slideLevel bs
- concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
-
-elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block]
-elementToBeamer _slideLevel (Blk b) = return [b]
-elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
- | lvl > slideLevel = do
- bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
- return $ Para ( RawInline "latex" "\\begin{block}{"
- : tit ++ [RawInline "latex" "}"] )
- : bs ++ [RawBlock "latex" "\\end{block}"]
- | lvl < slideLevel = do
- bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
- return $ (Header lvl (ident,classes,kvs) tit) : bs
- | otherwise = do -- lvl == slideLevel
- -- note: [fragile] is required or verbatim breaks
- let hasCodeBlock (CodeBlock _ _) = [True]
- hasCodeBlock _ = []
- let hasCode (Code _ _) = [True]
- hasCode _ = []
- let fragile = "fragile" `elem` classes ||
- not (null $ query hasCodeBlock elts ++ query hasCode elts)
- let frameoptions = ["allowdisplaybreaks", "allowframebreaks",
- "b", "c", "t", "environment",
- "label", "plain", "shrink", "standout"]
- let optionslist = ["fragile" | fragile] ++
- [k | k <- classes, k `elem` frameoptions] ++
- [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions]
- let options = if null optionslist
- then ""
- else "[" ++ intercalate "," optionslist ++ "]"
- let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) :
- if tit == [Str "\0"] -- marker for hrule
- then []
- else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"]
- let slideEnd = RawBlock "latex" "\\end{frame}"
- -- now carve up slide into blocks if there are sections inside
- bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
- return $ slideStart : bs ++ [slideEnd]
-
-isListBlock :: Block -> Bool
-isListBlock (BulletList _) = True
-isListBlock (OrderedList _ _) = True
-isListBlock (DefinitionList _) = True
-isListBlock _ = False
-
-isLineBreakOrSpace :: Inline -> Bool
-isLineBreakOrSpace LineBreak = True
-isLineBreakOrSpace SoftBreak = True
-isLineBreakOrSpace Space = True
-isLineBreakOrSpace _ = False
-
--- | Convert Pandoc block element to LaTeX.
-blockToLaTeX :: PandocMonad m
- => Block -- ^ Block to convert
- -> LW m Doc
-blockToLaTeX Null = return empty
-blockToLaTeX (Div (identifier,classes,kvs) bs) = do
- beamer <- gets stBeamer
- ref <- toLabel identifier
- let linkAnchor = if null identifier
- then empty
- else "\\hypertarget" <> braces (text ref) <>
- braces empty
- let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
- let wrapDir = case lookup "dir" kvs of
- Just "rtl" -> align "RTL"
- Just "ltr" -> align "LTR"
- _ -> id
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> let (l, o) = toPolyglossiaEnv lng
- ops = if null o
- then ""
- else brackets $ text o
- in inCmd "begin" (text l) <> ops
- $$ blankline <> txt <> blankline
- $$ inCmd "end" (text l)
- Nothing -> txt
- wrapNotes txt = if beamer && "notes" `elem` classes
- then "\\note" <> braces txt -- speaker notes
- else linkAnchor $$ txt
- fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
-blockToLaTeX (Plain lst) =
- inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
--- title beginning with fig: indicates that the image is a figure
-blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do
- inNote <- gets stInNote
- modify $ \st -> st{ stInMinipage = True, stNotes = [] }
- capt <- inlineListToLaTeX txt
- notes <- gets stNotes
- modify $ \st -> st{ stInMinipage = False, stNotes = [] }
-
- -- We can't have footnotes in the list of figures, so remove them:
- captForLof <- if null notes
- then return empty
- else brackets <$> inlineListToLaTeX (walk deNote txt)
- img <- inlineToLaTeX (Image attr txt (src,tit))
- let footnotes = notesToLaTeX notes
- lab <- labelFor ident
- let caption = "\\caption" <> captForLof <> braces capt <> lab
- figure <- hypertarget ident (cr <>
- "\\begin{figure}" $$ "\\centering" $$ img $$
- caption $$ "\\end{figure}" <> cr)
- return $ if inNote
- -- can't have figures in notes
- then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}"
- else figure $$ footnotes
--- . . . indicates pause in beamer slides
-blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do
- beamer <- gets stBeamer
- if beamer
- then blockToLaTeX (RawBlock "latex" "\\pause")
- else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."]
-blockToLaTeX (Para lst) =
- inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-blockToLaTeX (LineBlock lns) = do
- blockToLaTeX $ linesToPara lns
-blockToLaTeX (BlockQuote lst) = do
- beamer <- gets stBeamer
- case lst of
- [b] | beamer && isListBlock b -> do
- oldIncremental <- gets stIncremental
- modify $ \s -> s{ stIncremental = not oldIncremental }
- result <- blockToLaTeX b
- modify $ \s -> s{ stIncremental = oldIncremental }
- return result
- _ -> do
- oldInQuote <- gets stInQuote
- modify (\s -> s{stInQuote = True})
- contents <- blockListToLaTeX lst
- modify (\s -> s{stInQuote = oldInQuote})
- return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
-blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
- opts <- gets stOptions
- ref <- toLabel identifier
- let linkAnchor = if null identifier
- then empty
- else "\\hypertarget" <> braces (text ref) <>
- braces ("\\label" <> braces (text ref))
- let lhsCodeBlock = do
- modify $ \s -> s{ stLHS = True }
- return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
- "\\end{code}") $$ cr
- let rawCodeBlock = do
- st <- get
- env <- if stInNote st
- then modify (\s -> s{ stVerbInNote = True }) >>
- return "Verbatim"
- else return "verbatim"
- return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$
- text str $$ text ("\\end{" ++ env ++ "}")) <> cr
- let listingsCodeBlock = do
- st <- get
- let params = if writerListings (stOptions st)
- then (case getListingsLanguage classes of
- Just l -> [ "language=" ++ mbBraced l ]
- Nothing -> []) ++
- [ "numbers=left" | "numberLines" `elem` classes
- || "number" `elem` classes
- || "number-lines" `elem` classes ] ++
- [ (if key == "startFrom"
- then "firstnumber"
- else key) ++ "=" ++ mbBraced attr |
- (key,attr) <- keyvalAttr ] ++
- (if identifier == ""
- then []
- else [ "label=" ++ ref ])
-
- else []
- printParams
- | null params = empty
- | otherwise = brackets $ hcat (intersperse ", "
- (map text params))
- return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
- "\\end{lstlisting}") $$ cr
- let highlightedCodeBlock =
- case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
- Nothing -> rawCodeBlock
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (flush $ linkAnchor $$ text (T.unpack h))
- case () of
- _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
- "literate" `elem` classes -> lhsCodeBlock
- | writerListings opts -> listingsCodeBlock
- | not (null classes) && isJust (writerHighlightStyle opts)
- -> highlightedCodeBlock
- | otherwise -> rawCodeBlock
-blockToLaTeX b@(RawBlock f x)
- | f == Format "latex" || f == Format "tex"
- = return $ text x
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToLaTeX (BulletList []) = return empty -- otherwise latex error
-blockToLaTeX (BulletList lst) = do
- incremental <- gets stIncremental
- beamer <- gets stBeamer
- let inc = if beamer && incremental then "[<+->]" else ""
- items <- mapM listItemToLaTeX lst
- let spacing = if isTightList lst
- then text "\\tightlist"
- else empty
- return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$
- "\\end{itemize}"
-blockToLaTeX (OrderedList _ []) = return empty -- otherwise latex error
-blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
- st <- get
- let inc = if stIncremental st then "[<+->]" else ""
- let oldlevel = stOLLevel st
- put $ st {stOLLevel = oldlevel + 1}
- items <- mapM listItemToLaTeX lst
- modify (\s -> s {stOLLevel = oldlevel})
- let tostyle x = case numstyle of
- Decimal -> "\\arabic" <> braces x
- UpperRoman -> "\\Roman" <> braces x
- LowerRoman -> "\\roman" <> braces x
- UpperAlpha -> "\\Alph" <> braces x
- LowerAlpha -> "\\alph" <> braces x
- Example -> "\\arabic" <> braces x
- DefaultStyle -> "\\arabic" <> braces x
- let todelim x = case numdelim of
- OneParen -> x <> ")"
- TwoParens -> parens x
- Period -> x <> "."
- _ -> x <> "."
- let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel)
- let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim
- then empty
- else "\\def" <> "\\label" <> enum <>
- braces (todelim $ tostyle enum)
- let resetcounter = if start == 1 || oldlevel > 4
- then empty
- else "\\setcounter" <> braces enum <>
- braces (text $ show $ start - 1)
- let spacing = if isTightList lst
- then text "\\tightlist"
- else empty
- return $ text ("\\begin{enumerate}" ++ inc)
- $$ stylecommand
- $$ resetcounter
- $$ spacing
- $$ vcat items
- $$ "\\end{enumerate}"
-blockToLaTeX (DefinitionList []) = return empty
-blockToLaTeX (DefinitionList lst) = do
- incremental <- gets stIncremental
- let inc = if incremental then "[<+->]" else ""
- items <- mapM defListItemToLaTeX lst
- let spacing = if all isTightList (map snd lst)
- then text "\\tightlist"
- else empty
- return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$
- "\\end{description}"
-blockToLaTeX HorizontalRule = return $
- "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"
-blockToLaTeX (Header level (id',classes,_) lst) = do
- modify $ \s -> s{stInHeading = True}
- hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst
- modify $ \s -> s{stInHeading = False}
- return hdr
-blockToLaTeX (Table caption aligns widths heads rows) = do
- headers <- if all null heads
- then return empty
- else do
- contents <- (tableRowToLaTeX True aligns widths) heads
- return ("\\toprule" $$ contents $$ "\\midrule")
- let endhead = if all null heads
- then empty
- else text "\\endhead"
- let endfirsthead = if all null heads
- then empty
- else text "\\endfirsthead"
- captionText <- inlineListToLaTeX caption
- let capt = if isEmpty captionText
- then empty
- else text "\\caption" <> braces captionText <> "\\tabularnewline"
- $$ headers
- $$ endfirsthead
- rows' <- mapM (tableRowToLaTeX False aligns widths) rows
- let colDescriptors = text $ concat $ map toColDescriptor aligns
- modify $ \s -> s{ stTable = True }
- return $ "\\begin{longtable}[]" <>
- braces ("@{}" <> colDescriptors <> "@{}")
- -- the @{} removes extra space at beginning and end
- $$ capt
- $$ (if all null heads then "\\toprule" else empty)
- $$ headers
- $$ endhead
- $$ vcat rows'
- $$ "\\bottomrule"
- $$ "\\end{longtable}"
-
-toColDescriptor :: Alignment -> String
-toColDescriptor align =
- case align of
- AlignLeft -> "l"
- AlignRight -> "r"
- AlignCenter -> "c"
- AlignDefault -> "l"
-
-blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc
-blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst
-
-tableRowToLaTeX :: PandocMonad m
- => Bool
- -> [Alignment]
- -> [Double]
- -> [[Block]]
- -> LW m Doc
-tableRowToLaTeX header aligns widths cols = do
- -- scale factor compensates for extra space between columns
- -- so the whole table isn't larger than columnwidth
- let scaleFactor = 0.97 ** fromIntegral (length aligns)
- let isSimple [Plain _] = True
- isSimple [Para _] = True
- isSimple [] = True
- isSimple _ = False
- -- simple tables have to have simple cells:
- let widths' = if not (all isSimple cols)
- then replicate (length aligns)
- (0.97 / fromIntegral (length aligns))
- else map (scaleFactor *) widths
- cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
- return $ hsep (intersperse "&" cells) <> "\\tabularnewline"
-
--- For simple latex tables (without minipages or parboxes),
--- we need to go to some lengths to get line breaks working:
--- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
-fixLineBreaks :: Block -> Block
-fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils
-fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
-fixLineBreaks x = x
-
-fixLineBreaks' :: [Inline] -> [Inline]
-fixLineBreaks' ils = case splitBy (== LineBreak) ils of
- [] -> []
- [xs] -> xs
- chunks -> RawInline "tex" "\\vtop{" :
- concatMap tohbox chunks ++
- [RawInline "tex" "}"]
- where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++
- [RawInline "tex" "}"]
-
--- We also change display math to inline math, since display
--- math breaks in simple tables.
-displayMathToInline :: Inline -> Inline
-displayMathToInline (Math DisplayMath x) = Math InlineMath x
-displayMathToInline x = x
-
-tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block])
- -> LW m Doc
-tableCellToLaTeX _ (0, _, blocks) =
- blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
-tableCellToLaTeX header (width, align, blocks) = do
- modify $ \st -> st{ stInMinipage = True, stNotes = [] }
- cellContents <- blockListToLaTeX blocks
- notes <- gets stNotes
- modify $ \st -> st{ stInMinipage = False, stNotes = [] }
- let valign = text $ if header then "[b]" else "[t]"
- let halign = case align of
- AlignLeft -> "\\raggedright"
- AlignRight -> "\\raggedleft"
- AlignCenter -> "\\centering"
- AlignDefault -> "\\raggedright"
- return $ ("\\begin{minipage}" <> valign <>
- braces (text (printf "%.2f\\columnwidth" width)) <>
- (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <>
- "\\end{minipage}") $$
- notesToLaTeX notes
-
-notesToLaTeX :: [Doc] -> Doc
-notesToLaTeX [] = empty
-notesToLaTeX ns = (case length ns of
- n | n > 1 -> "\\addtocounter" <>
- braces "footnote" <>
- braces (text $ show $ 1 - n)
- | otherwise -> empty)
- $$
- vcat (intersperse
- ("\\addtocounter" <> braces "footnote" <> braces "1")
- $ map (\x -> "\\footnotetext" <> braces x)
- $ reverse ns)
-
-listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc
-listItemToLaTeX lst
- -- we need to put some text before a header if it's the first
- -- element in an item. This will look ugly in LaTeX regardless, but
- -- this will keep the typesetter from throwing an error.
- | ((Header _ _ _) :_) <- lst =
- blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2)
- | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) .
- (nest 2)
-
-defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc
-defListItemToLaTeX (term, defs) = do
- term' <- inlineListToLaTeX term
- -- put braces around term if it contains an internal link,
- -- since otherwise we get bad bracket interactions: \item[\hyperref[..]
- let isInternalLink (Link _ _ ('#':_,_)) = True
- isInternalLink _ = False
- let term'' = if any isInternalLink term
- then braces term'
- else term'
- def' <- liftM vsep $ mapM blockListToLaTeX defs
- return $ case defs of
- (((Header _ _ _) : _) : _) ->
- "\\item" <> brackets term'' <> " ~ " $$ def'
- _ ->
- "\\item" <> brackets term'' $$ def'
-
--- | Craft the section header, inserting the secton reference, if supplied.
-sectionHeader :: PandocMonad m
- => Bool -- True for unnumbered
- -> [Char]
- -> Int
- -> [Inline]
- -> LW m Doc
-sectionHeader unnumbered ident level lst = do
- txt <- inlineListToLaTeX lst
- plain <- stringToLaTeX TextString $ concatMap stringify lst
- let removeInvalidInline (Note _) = []
- removeInvalidInline (Span (id', _, _) _) | not (null id') = []
- removeInvalidInline (Image _ _ _) = []
- removeInvalidInline x = [x]
- let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst
- txtNoNotes <- inlineListToLaTeX lstNoNotes
- -- footnotes in sections don't work (except for starred variants)
- -- unless you specify an optional argument:
- -- \section[mysec]{mysec\footnote{blah}}
- optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == []
- then return empty
- else do
- return $ brackets txtNoNotes
- let contents = if render Nothing txt == plain
- then braces txt
- else braces (text "\\texorpdfstring"
- <> braces txt
- <> braces (text plain))
- book <- gets stBook
- opts <- gets stOptions
- let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault
- then TopLevelChapter
- else writerTopLevelDivision opts
- beamer <- gets stBeamer
- let level' = if beamer &&
- topLevelDivision `elem` [TopLevelPart, TopLevelChapter]
- -- beamer has parts but no chapters
- then if level == 1 then -1 else level - 1
- else case topLevelDivision of
- TopLevelPart -> level - 2
- TopLevelChapter -> level - 1
- TopLevelSection -> level
- TopLevelDefault -> level
- let sectionType = case level' of
- -1 -> "part"
- 0 -> "chapter"
- 1 -> "section"
- 2 -> "subsection"
- 3 -> "subsubsection"
- 4 -> "paragraph"
- 5 -> "subparagraph"
- _ -> ""
- inQuote <- gets stInQuote
- let prefix = if inQuote && level' >= 4
- then text "\\mbox{}%"
- -- needed for \paragraph, \subparagraph in quote environment
- -- see http://tex.stackexchange.com/questions/169830/
- else empty
- lab <- labelFor ident
- let star = if unnumbered && level' < 4 then text "*" else empty
- let stuffing = star <> optional <> contents
- stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab
- return $ if level' > 5
- then txt
- else prefix $$ stuffing'
- $$ if unnumbered
- then "\\addcontentsline{toc}" <>
- braces (text sectionType) <>
- braces txtNoNotes
- else empty
-
-hypertarget :: PandocMonad m => String -> Doc -> LW m Doc
-hypertarget ident x = do
- ref <- text `fmap` toLabel ident
- internalLinks <- gets stInternalLinks
- return $
- if ident `elem` internalLinks
- then text "\\hypertarget"
- <> braces ref
- <> braces x
- else x
-
-labelFor :: PandocMonad m => String -> LW m Doc
-labelFor "" = return empty
-labelFor ident = do
- ref <- text `fmap` toLabel ident
- return $ text "\\label" <> braces ref
-
--- | Convert list of inline elements to LaTeX.
-inlineListToLaTeX :: PandocMonad m
- => [Inline] -- ^ Inlines to convert
- -> LW m Doc
-inlineListToLaTeX lst =
- mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst)
- >>= return . hcat
- -- nonbreaking spaces (~) in LaTeX don't work after line breaks,
- -- so we turn nbsps after hard breaks to \hspace commands.
- -- this is mostly used in verse.
- where fixLineInitialSpaces [] = []
- fixLineInitialSpaces (LineBreak : Str s@('\160':_) : xs) =
- LineBreak : fixNbsps s ++ fixLineInitialSpaces xs
- fixLineInitialSpaces (x:xs) = x : fixLineInitialSpaces xs
- fixNbsps s = let (ys,zs) = span (=='\160') s
- in replicate (length ys) hspace ++ [Str zs]
- hspace = RawInline "latex" "\\hspace*{0.333em}"
- -- linebreaks after blank lines cause problems:
- fixBreaks [] = []
- fixBreaks ys@(LineBreak : LineBreak : _) =
- case span (== LineBreak) ys of
- (lbs, rest) -> RawInline "latex"
- ("\\\\[" ++ show (length lbs) ++
- "\\baselineskip]") : fixBreaks rest
- fixBreaks (y:ys) = y : fixBreaks ys
-
-isQuoted :: Inline -> Bool
-isQuoted (Quoted _ _) = True
-isQuoted _ = False
-
--- | Convert inline element to LaTeX
-inlineToLaTeX :: PandocMonad m
- => Inline -- ^ Inline to convert
- -> LW m Doc
-inlineToLaTeX (Span (id',classes,kvs) ils) = do
- ref <- toLabel id'
- let linkAnchor = if null id'
- then empty
- else "\\protect\\hypertarget" <> braces (text ref) <>
- braces empty
- let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
- ["textnormal" | "csl-no-strong" `elem` classes ||
- "csl-no-smallcaps" `elem` classes] ++
- ["RL" | ("dir", "rtl") `elem` kvs] ++
- ["LR" | ("dir", "ltr") `elem` kvs] ++
- (case lookup "lang" kvs of
- Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng
- ops = if null o then "" else ("[" ++ o ++ "]")
- in ["text" ++ l ++ ops]
- Nothing -> [])
- contents <- inlineListToLaTeX ils
- return $ linkAnchor <>
- if null cmds
- then braces contents
- else foldr inCmd contents cmds
-inlineToLaTeX (Emph lst) =
- inlineListToLaTeX lst >>= return . inCmd "emph"
-inlineToLaTeX (Strong lst) =
- inlineListToLaTeX lst >>= return . inCmd "textbf"
-inlineToLaTeX (Strikeout lst) = do
- -- we need to protect VERB in an mbox or we get an error
- -- see #1294
- contents <- inlineListToLaTeX $ protectCode lst
- modify $ \s -> s{ stStrikeout = True }
- return $ inCmd "sout" contents
-inlineToLaTeX (Superscript lst) =
- inlineListToLaTeX lst >>= return . inCmd "textsuperscript"
-inlineToLaTeX (Subscript lst) = do
- inlineListToLaTeX lst >>= return . inCmd "textsubscript"
-inlineToLaTeX (SmallCaps lst) =
- inlineListToLaTeX lst >>= return . inCmd "textsc"
-inlineToLaTeX (Cite cits lst) = do
- st <- get
- let opts = stOptions st
- case writerCiteMethod opts of
- Natbib -> citationsToNatbib cits
- Biblatex -> citationsToBiblatex cits
- _ -> inlineListToLaTeX lst
-
-inlineToLaTeX (Code (_,classes,_) str) = do
- opts <- gets stOptions
- inHeading <- gets stInHeading
- case () of
- _ | writerListings opts && not inHeading -> listingsCode
- | isJust (writerHighlightStyle opts) && not (null classes)
- -> highlightCode
- | otherwise -> rawCode
- where listingsCode = do
- let listingsopt = case getListingsLanguage classes of
- Just l -> "[language=" ++ mbBraced l ++ "]"
- Nothing -> ""
- inNote <- gets stInNote
- when inNote $ modify $ \s -> s{ stVerbInNote = True }
- let chr = case "!\"&'()*,-./:;?@_" \\ str of
- (c:_) -> c
- [] -> '!'
- return $ text $ "\\lstinline" ++ listingsopt ++ [chr] ++ str ++ [chr]
- highlightCode = do
- case highlight formatLaTeXInline ("",classes,[]) str of
- Nothing -> rawCode
- Just h -> modify (\st -> st{ stHighlighting = True }) >>
- return (text (T.unpack h))
- rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}"))
- $ stringToLaTeX CodeString str
- where
- escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c])
-inlineToLaTeX (Quoted qt lst) = do
- contents <- inlineListToLaTeX lst
- csquotes <- liftM stCsquotes get
- opts <- gets stOptions
- if csquotes
- then return $ "\\enquote" <> braces contents
- else do
- let s1 = if (not (null lst)) && (isQuoted (head lst))
- then "\\,"
- else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
- then "\\,"
- else empty
- let inner = s1 <> contents <> s2
- return $ case qt of
- DoubleQuote ->
- if isEnabled Ext_smart opts
- then text "``" <> inner <> text "''"
- else char '\x201C' <> inner <> char '\x201D'
- SingleQuote ->
- if isEnabled Ext_smart opts
- then char '`' <> inner <> char '\''
- else char '\x2018' <> inner <> char '\x2019'
-inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str
-inlineToLaTeX (Math InlineMath str) =
- return $ "\\(" <> text str <> "\\)"
-inlineToLaTeX (Math DisplayMath str) =
- return $ "\\[" <> text str <> "\\]"
-inlineToLaTeX il@(RawInline f str)
- | f == Format "latex" || f == Format "tex"
- = return $ text str
- | otherwise = do
- report $ InlineNotRendered il
- return empty
-inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr
-inlineToLaTeX SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- case wrapText of
- WrapAuto -> return space
- WrapNone -> return space
- WrapPreserve -> return cr
-inlineToLaTeX Space = return space
-inlineToLaTeX (Link _ txt ('#':ident, _)) = do
- contents <- inlineListToLaTeX txt
- lab <- toLabel ident
- return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents
-inlineToLaTeX (Link _ txt (src, _)) =
- case txt of
- [Str x] | escapeURI x == src -> -- autolink
- do modify $ \s -> s{ stUrl = True }
- src' <- stringToLaTeX URLString (escapeURI src)
- return $ text $ "\\url{" ++ src' ++ "}"
- [Str x] | Just rest <- stripPrefix "mailto:" src,
- escapeURI x == rest -> -- email autolink
- do modify $ \s -> s{ stUrl = True }
- src' <- stringToLaTeX URLString (escapeURI src)
- contents <- inlineListToLaTeX txt
- return $ "\\href" <> braces (text src') <>
- braces ("\\nolinkurl" <> braces contents)
- _ -> do contents <- inlineListToLaTeX txt
- src' <- stringToLaTeX URLString (escapeURI src)
- return $ text ("\\href{" ++ src' ++ "}{") <>
- contents <> char '}'
-inlineToLaTeX (Image attr _ (source, _)) = do
- modify $ \s -> s{ stGraphics = True }
- opts <- gets stOptions
- let showDim dir = let d = text (show dir) <> "="
- in case (dimension dir attr) of
- Just (Pixel a) ->
- [d <> text (showInInch opts (Pixel a)) <> "in"]
- Just (Percent a) ->
- [d <> text (showFl (a / 100)) <> "\\textwidth"]
- Just dim ->
- [d <> text (show dim)]
- Nothing ->
- []
- dimList = showDim Width ++ showDim Height
- dims = if null dimList
- then empty
- else brackets $ cat (intersperse "," dimList)
- source' = if isURI source
- then source
- else unEscapeString source
- source'' <- stringToLaTeX URLString source'
- inHeading <- gets stInHeading
- return $
- (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <>
- dims <> braces (text source'')
-inlineToLaTeX (Note contents) = do
- inMinipage <- gets stInMinipage
- modify (\s -> s{stInNote = True})
- contents' <- blockListToLaTeX contents
- modify (\s -> s {stInNote = False})
- let optnl = case reverse contents of
- (CodeBlock _ _ : _) -> cr
- _ -> empty
- let noteContents = nest 2 contents' <> optnl
- beamer <- gets stBeamer
- -- in beamer slides, display footnote from current overlay forward
- let beamerMark = if beamer
- then text "<.->"
- else empty
- modify $ \st -> st{ stNotes = noteContents : stNotes st }
- return $
- if inMinipage
- then "\\footnotemark{}"
- -- note: a \n before } needed when note ends with a Verbatim environment
- else "\\footnote" <> beamerMark <> braces noteContents
-
-protectCode :: [Inline] -> [Inline]
-protectCode [] = []
-protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs
-protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs
- where ltx = RawInline (Format "latex")
-protectCode (x : xs) = x : protectCode xs
-
-citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc
-citationsToNatbib (one:[])
- = citeCommand c p s k
- where
- Citation { citationId = k
- , citationPrefix = p
- , citationSuffix = s
- , citationMode = m
- }
- = one
- c = case m of
- AuthorInText -> "citet"
- SuppressAuthor -> "citeyearpar"
- NormalCitation -> "citep"
-
-citationsToNatbib cits
- | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
- = citeCommand "citep" p s ks
- where
- noPrefix = all (null . citationPrefix)
- noSuffix = all (null . citationSuffix)
- ismode m = all (((==) m) . citationMode)
- p = citationPrefix $ head $ cits
- s = citationSuffix $ last $ cits
- ks = intercalate ", " $ map citationId cits
-
-citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
- author <- citeCommand "citeauthor" [] [] (citationId c)
- cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs)
- return $ author <+> cits
-
-citationsToNatbib cits = do
- cits' <- mapM convertOne cits
- return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
- where
- combineTwo a b | isEmpty a = b
- | otherwise = a <> text "; " <> b
- convertOne Citation { citationId = k
- , citationPrefix = p
- , citationSuffix = s
- , citationMode = m
- }
- = case m of
- AuthorInText -> citeCommand "citealt" p s k
- SuppressAuthor -> citeCommand "citeyear" p s k
- NormalCitation -> citeCommand "citealp" p s k
-
-citeCommand :: PandocMonad m
- => String -> [Inline] -> [Inline] -> String -> LW m Doc
-citeCommand c p s k = do
- args <- citeArguments p s k
- return $ text ("\\" ++ c) <> args
-
-citeArguments :: PandocMonad m
- => [Inline] -> [Inline] -> String -> LW m Doc
-citeArguments p s k = do
- let s' = case s of
- (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r
- (Str (x:xs) : r) | isPunctuation x -> Str xs : r
- _ -> s
- pdoc <- inlineListToLaTeX p
- sdoc <- inlineListToLaTeX s'
- let optargs = case (isEmpty pdoc, isEmpty sdoc) of
- (True, True ) -> empty
- (True, False) -> brackets sdoc
- (_ , _ ) -> brackets pdoc <> brackets sdoc
- return $ optargs <> braces (text k)
-
-citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc
-citationsToBiblatex (one:[])
- = citeCommand cmd p s k
- where
- Citation { citationId = k
- , citationPrefix = p
- , citationSuffix = s
- , citationMode = m
- } = one
- cmd = case m of
- SuppressAuthor -> "autocite*"
- AuthorInText -> "textcite"
- NormalCitation -> "autocite"
-
-citationsToBiblatex (c:cs) = do
- args <- mapM convertOne (c:cs)
- return $ text cmd <> foldl' (<>) empty args
- where
- cmd = case citationMode c of
- SuppressAuthor -> "\\autocites*"
- AuthorInText -> "\\textcites"
- NormalCitation -> "\\autocites"
- convertOne Citation { citationId = k
- , citationPrefix = p
- , citationSuffix = s
- }
- = citeArguments p s k
-
-citationsToBiblatex _ = return empty
-
--- Determine listings language from list of class attributes.
-getListingsLanguage :: [String] -> Maybe String
-getListingsLanguage [] = Nothing
-getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
-
-mbBraced :: String -> String
-mbBraced x = if not (all isAlphaNum x)
- then "{" <> x <> "}"
- else x
-
--- Extract a key from divs and spans
-extract :: String -> Block -> [String]
-extract key (Div attr _) = lookKey key attr
-extract key (Plain ils) = concatMap (extractInline key) ils
-extract key (Para ils) = concatMap (extractInline key) ils
-extract key (Header _ _ ils) = concatMap (extractInline key) ils
-extract _ _ = []
-
--- Extract a key from spans
-extractInline :: String -> Inline -> [String]
-extractInline key (Span attr _) = lookKey key attr
-extractInline _ _ = []
-
--- Look up a key in an attribute and give a list of its values
-lookKey :: String -> Attr -> [String]
-lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
-
--- In environments \Arabic instead of \arabic is used
-toPolyglossiaEnv :: String -> (String, String)
-toPolyglossiaEnv l =
- case toPolyglossia $ (splitBy (=='-')) l of
- ("arabic", o) -> ("Arabic", o)
- x -> x
-
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Polyglossia (language, options) tuple
--- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
-toPolyglossia :: [String] -> (String, String)
-toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria")
-toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya")
-toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco")
-toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania")
-toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia")
-toPolyglossia ("de":"1901":_) = ("german", "spelling=old")
-toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old")
-toPolyglossia ("de":"AT":_) = ("german", "variant=austrian")
-toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old")
-toPolyglossia ("de":"CH":_) = ("german", "variant=swiss")
-toPolyglossia ("de":_) = ("german", "")
-toPolyglossia ("dsb":_) = ("lsorbian", "")
-toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly")
-toPolyglossia ("en":"AU":_) = ("english", "variant=australian")
-toPolyglossia ("en":"CA":_) = ("english", "variant=canadian")
-toPolyglossia ("en":"GB":_) = ("english", "variant=british")
-toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand")
-toPolyglossia ("en":"UK":_) = ("english", "variant=british")
-toPolyglossia ("en":"US":_) = ("english", "variant=american")
-toPolyglossia ("grc":_) = ("greek", "variant=ancient")
-toPolyglossia ("hsb":_) = ("usorbian", "")
-toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic")
-toPolyglossia ("sl":_) = ("slovenian", "")
-toPolyglossia x = (commonFromBcp47 x, "")
-
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Babel language string.
--- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
--- List of supported languages (slightly outdated):
--- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
-toBabel :: [String] -> String
-toBabel ("de":"1901":_) = "german"
-toBabel ("de":"AT":"1901":_) = "austrian"
-toBabel ("de":"AT":_) = "naustrian"
-toBabel ("de":"CH":"1901":_) = "swissgerman"
-toBabel ("de":"CH":_) = "nswissgerman"
-toBabel ("de":_) = "ngerman"
-toBabel ("dsb":_) = "lowersorbian"
-toBabel ("el":"polyton":_) = "polutonikogreek"
-toBabel ("en":"AU":_) = "australian"
-toBabel ("en":"CA":_) = "canadian"
-toBabel ("en":"GB":_) = "british"
-toBabel ("en":"NZ":_) = "newzealand"
-toBabel ("en":"UK":_) = "british"
-toBabel ("en":"US":_) = "american"
-toBabel ("fr":"CA":_) = "canadien"
-toBabel ("fra":"aca":_) = "acadian"
-toBabel ("grc":_) = "polutonikogreek"
-toBabel ("hsb":_) = "uppersorbian"
-toBabel ("la":"x":"classic":_) = "classiclatin"
-toBabel ("sl":_) = "slovene"
-toBabel x = commonFromBcp47 x
-
--- Takes a list of the constituents of a BCP 47 language code
--- and converts it to a string shared by Babel and Polyglossia.
--- https://tools.ietf.org/html/bcp47#section-2.1
-commonFromBcp47 :: [String] -> String
-commonFromBcp47 [] = ""
-commonFromBcp47 ("pt":"BR":_) = "brazil"
--- Note: documentation says "brazilian" works too, but it doesn't seem to work
--- on some systems. See #2953.
-commonFromBcp47 ("sr":"Cyrl":_) = "serbianc"
-commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin"
-commonFromBcp47 x = fromIso $ head x
- where
- fromIso "af" = "afrikaans"
- fromIso "am" = "amharic"
- fromIso "ar" = "arabic"
- fromIso "as" = "assamese"
- fromIso "ast" = "asturian"
- fromIso "bg" = "bulgarian"
- fromIso "bn" = "bengali"
- fromIso "bo" = "tibetan"
- fromIso "br" = "breton"
- fromIso "ca" = "catalan"
- fromIso "cy" = "welsh"
- fromIso "cs" = "czech"
- fromIso "cop" = "coptic"
- fromIso "da" = "danish"
- fromIso "dv" = "divehi"
- fromIso "el" = "greek"
- fromIso "en" = "english"
- fromIso "eo" = "esperanto"
- fromIso "es" = "spanish"
- fromIso "et" = "estonian"
- fromIso "eu" = "basque"
- fromIso "fa" = "farsi"
- fromIso "fi" = "finnish"
- fromIso "fr" = "french"
- fromIso "fur" = "friulan"
- fromIso "ga" = "irish"
- fromIso "gd" = "scottish"
- fromIso "gez" = "ethiopic"
- fromIso "gl" = "galician"
- fromIso "he" = "hebrew"
- fromIso "hi" = "hindi"
- fromIso "hr" = "croatian"
- fromIso "hu" = "magyar"
- fromIso "hy" = "armenian"
- fromIso "ia" = "interlingua"
- fromIso "id" = "indonesian"
- fromIso "ie" = "interlingua"
- fromIso "is" = "icelandic"
- fromIso "it" = "italian"
- fromIso "jp" = "japanese"
- fromIso "km" = "khmer"
- fromIso "kmr" = "kurmanji"
- fromIso "kn" = "kannada"
- fromIso "ko" = "korean"
- fromIso "la" = "latin"
- fromIso "lo" = "lao"
- fromIso "lt" = "lithuanian"
- fromIso "lv" = "latvian"
- fromIso "ml" = "malayalam"
- fromIso "mn" = "mongolian"
- fromIso "mr" = "marathi"
- fromIso "nb" = "norsk"
- fromIso "nl" = "dutch"
- fromIso "nn" = "nynorsk"
- fromIso "no" = "norsk"
- fromIso "nqo" = "nko"
- fromIso "oc" = "occitan"
- fromIso "pa" = "panjabi"
- fromIso "pl" = "polish"
- fromIso "pms" = "piedmontese"
- fromIso "pt" = "portuguese"
- fromIso "rm" = "romansh"
- fromIso "ro" = "romanian"
- fromIso "ru" = "russian"
- fromIso "sa" = "sanskrit"
- fromIso "se" = "samin"
- fromIso "sk" = "slovak"
- fromIso "sq" = "albanian"
- fromIso "sr" = "serbian"
- fromIso "sv" = "swedish"
- fromIso "syr" = "syriac"
- fromIso "ta" = "tamil"
- fromIso "te" = "telugu"
- fromIso "th" = "thai"
- fromIso "ti" = "ethiopic"
- fromIso "tk" = "turkmen"
- fromIso "tr" = "turkish"
- fromIso "uk" = "ukrainian"
- fromIso "ur" = "urdu"
- fromIso "vi" = "vietnamese"
- fromIso _ = ""
-
-pDocumentOptions :: P.Parsec String () [String]
-pDocumentOptions = do
- P.char '['
- opts <- P.sepBy
- (P.many $ P.spaces *> P.noneOf (" ,]" :: String) <* P.spaces)
- (P.char ',')
- P.char ']'
- return opts
-
-pDocumentClass :: P.Parsec String () String
-pDocumentClass =
- do P.skipMany (P.satisfy (/='\\'))
- P.string "\\documentclass"
- classOptions <- pDocumentOptions <|> return []
- if ("article" :: String) `elem` classOptions
- then return "article"
- else do P.skipMany (P.satisfy (/='{'))
- P.char '{'
- P.manyTill P.letter (P.char '}')
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
deleted file mode 100644
index f33acef32..000000000
--- a/src/Text/Pandoc/Writers/Man.hs
+++ /dev/null
@@ -1,381 +0,0 @@
-{-
-Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Man
- Copyright : Copyright (C) 2007-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to groff man page format.
-
--}
-module Text.Pandoc.Writers.Man ( writeMan) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Templates
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Writers.Math
-import Text.Printf ( printf )
-import Data.List ( stripPrefix, intersperse, intercalate )
-import Data.Maybe (fromMaybe)
-import Text.Pandoc.Pretty
-import Text.Pandoc.Builder (deleteMeta)
-import Control.Monad.State
-import Text.Pandoc.Error
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
-type Notes = [[Block]]
-data WriterState = WriterState { stNotes :: Notes
- , stHasTables :: Bool }
-
--- | Convert Pandoc to Man.
-writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeMan opts document = evalStateT (pandocToMan opts document) (WriterState [] False)
-
--- | Return groff man representation of document.
-pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m String
-pandocToMan opts (Pandoc meta blocks) = do
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- let render' = render colwidth
- titleText <- inlineListToMan opts $ docTitle meta
- let title' = render' titleText
- let setFieldsFromTitle =
- case break (== ' ') title' of
- (cmdName, rest) -> case break (=='(') cmdName of
- (xs, '(':ys) | not (null ys) &&
- last ys == ')' ->
- defField "title" xs .
- defField "section" (init ys) .
- case splitBy (=='|') rest of
- (ft:hds) ->
- defField "footer" (trim ft) .
- defField "header"
- (trim $ concat hds)
- [] -> id
- _ -> defField "title" title'
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToMan opts)
- (fmap (render colwidth) . inlineListToMan opts)
- $ deleteMeta "title" meta
- body <- blockListToMan opts blocks
- notes <- liftM stNotes get
- notes' <- notesToMan opts (reverse notes)
- let main = render' $ body $$ notes' $$ text ""
- hasTables <- liftM stHasTables get
- let context = defField "body" main
- $ setFieldsFromTitle
- $ defField "has-tables" hasTables
- $ defField "hyphenate" True
- $ defField "pandoc-version" pandocVersion
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Return man representation of notes.
-notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc
-notesToMan opts notes =
- if null notes
- then return empty
- else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
- return . (text ".SH NOTES" $$) . vcat
-
--- | Return man representation of a note.
-noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc
-noteToMan opts num note = do
- contents <- blockListToMan opts note
- let marker = cr <> text ".SS " <> brackets (text (show num))
- return $ marker $$ contents
-
--- | Association list of characters to escape.
-manEscapes :: [(Char, String)]
-manEscapes = [ ('\160', "\\ ")
- , ('\'', "\\[aq]")
- , ('’', "'")
- , ('\x2014', "\\[em]")
- , ('\x2013', "\\[en]")
- , ('\x2026', "\\&...")
- ] ++ backslashEscapes "-@\\"
-
--- | Escape special characters for Man.
-escapeString :: String -> String
-escapeString = escapeStringUsing manEscapes
-
--- | Escape a literal (code) section for Man.
-escapeCode :: String -> String
-escapeCode = concat . intersperse "\n" . map escapeLine . lines where
- escapeLine codeline =
- case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of
- a@('.':_) -> "\\&" ++ a
- b -> b
-
--- We split inline lists into sentences, and print one sentence per
--- line. groff/troff treats the line-ending period differently.
--- See http://code.google.com/p/pandoc/issues/detail?id=148.
-
--- | Returns the first sentence in a list of inlines, and the rest.
-breakSentence :: [Inline] -> ([Inline], [Inline])
-breakSentence [] = ([],[])
-breakSentence xs =
- let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
- isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
- isSentenceEndInline (LineBreak) = True
- isSentenceEndInline _ = False
- (as, bs) = break isSentenceEndInline xs
- in case bs of
- [] -> (as, [])
- [c] -> (as ++ [c], [])
- (c:Space:cs) -> (as ++ [c], cs)
- (c:SoftBreak:cs) -> (as ++ [c], cs)
- (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
- (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
- (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
- (c:cs) -> (as ++ [c] ++ ds, es)
- where (ds, es) = breakSentence cs
-
--- | Split a list of inlines into sentences.
-splitSentences :: [Inline] -> [[Inline]]
-splitSentences xs =
- let (sent, rest) = breakSentence xs
- in if null rest then [sent] else sent : splitSentences rest
-
--- | Convert Pandoc block element to man.
-blockToMan :: PandocMonad m
- => WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> StateT WriterState m Doc
-blockToMan _ Null = return empty
-blockToMan opts (Div _ bs) = blockListToMan opts bs
-blockToMan opts (Plain inlines) =
- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
-blockToMan opts (Para inlines) = do
- contents <- liftM vcat $ mapM (inlineListToMan opts) $
- splitSentences inlines
- return $ text ".PP" $$ contents
-blockToMan opts (LineBlock lns) =
- blockToMan opts $ linesToPara lns
-blockToMan _ b@(RawBlock f str)
- | f == Format "man" = return $ text str
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
-blockToMan opts (Header level _ inlines) = do
- contents <- inlineListToMan opts inlines
- let heading = case level of
- 1 -> ".SH "
- _ -> ".SS "
- return $ text heading <> contents
-blockToMan _ (CodeBlock _ str) = return $
- text ".IP" $$
- text ".nf" $$
- text "\\f[C]" $$
- text (escapeCode str) $$
- text "\\f[]" $$
- text ".fi"
-blockToMan opts (BlockQuote blocks) = do
- contents <- blockListToMan opts blocks
- return $ text ".RS" $$ contents $$ text ".RE"
-blockToMan opts (Table caption alignments widths headers rows) =
- let aligncode AlignLeft = "l"
- aligncode AlignRight = "r"
- aligncode AlignCenter = "c"
- aligncode AlignDefault = "l"
- in do
- caption' <- inlineListToMan opts caption
- modify $ \st -> st{ stHasTables = True }
- let iwidths = if all (== 0) widths
- then repeat ""
- else map (printf "w(%0.1fn)" . (70 *)) widths
- -- 78n default width - 8n indent = 70n
- let coldescriptions = text $ intercalate " "
- (zipWith (\align width -> aligncode align ++ width)
- alignments iwidths) ++ "."
- colheadings <- mapM (blockListToMan opts) headers
- let makeRow cols = text "T{" $$
- (vcat $ intersperse (text "T}@T{") cols) $$
- text "T}"
- let colheadings' = if all null headers
- then empty
- else makeRow colheadings $$ char '_'
- body <- mapM (\row -> do
- cols <- mapM (blockListToMan opts) row
- return $ makeRow cols) rows
- return $ text ".PP" $$ caption' $$
- text ".TS" $$ text "tab(@);" $$ coldescriptions $$
- colheadings' $$ vcat body $$ text ".TE"
-
-blockToMan opts (BulletList items) = do
- contents <- mapM (bulletListItemToMan opts) items
- return (vcat contents)
-blockToMan opts (OrderedList attribs items) = do
- let markers = take (length items) $ orderedListMarkers attribs
- let indent = 1 + (maximum $ map length markers)
- contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
- zip markers items
- return (vcat contents)
-blockToMan opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMan opts) items
- return (vcat contents)
-
--- | Convert bullet list item (list of blocks) to man.
-bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc
-bulletListItemToMan _ [] = return empty
-bulletListItemToMan opts ((Para first):rest) =
- bulletListItemToMan opts ((Plain first):rest)
-bulletListItemToMan opts ((Plain first):rest) = do
- first' <- blockToMan opts (Plain first)
- rest' <- blockListToMan opts rest
- let first'' = text ".IP \\[bu] 2" $$ first'
- let rest'' = if null rest
- then empty
- else text ".RS 2" $$ rest' $$ text ".RE"
- return (first'' $$ rest'')
-bulletListItemToMan opts (first:rest) = do
- first' <- blockToMan opts first
- rest' <- blockListToMan opts rest
- return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
-
--- | Convert ordered list item (a list of blocks) to man.
-orderedListItemToMan :: PandocMonad m
- => WriterOptions -- ^ options
- -> String -- ^ order marker for list item
- -> Int -- ^ number of spaces to indent
- -> [Block] -- ^ list item (list of blocks)
- -> StateT WriterState m Doc
-orderedListItemToMan _ _ _ [] = return empty
-orderedListItemToMan opts num indent ((Para first):rest) =
- orderedListItemToMan opts num indent ((Plain first):rest)
-orderedListItemToMan opts num indent (first:rest) = do
- first' <- blockToMan opts first
- rest' <- blockListToMan opts rest
- let num' = printf ("%" ++ show (indent - 1) ++ "s") num
- let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
- let rest'' = if null rest
- then empty
- else text ".RS 4" $$ rest' $$ text ".RE"
- return $ first'' $$ rest''
-
--- | Convert definition list item (label, list of blocks) to man.
-definitionListItemToMan :: PandocMonad m
- => WriterOptions
- -> ([Inline],[[Block]])
- -> StateT WriterState m Doc
-definitionListItemToMan opts (label, defs) = do
- labelText <- inlineListToMan opts label
- contents <- if null defs
- then return empty
- else liftM vcat $ forM defs $ \blocks -> do
- (first, rest) <- case blocks of
- ((Para x):y) -> return (Plain x,y)
- (x:y) -> return (x,y)
- [] -> throwError $ PandocSomeError "blocks is null"
- rest' <- liftM vcat $
- mapM (\item -> blockToMan opts item) rest
- first' <- blockToMan opts first
- return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
- return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents
-
--- | Convert list of Pandoc block elements to man.
-blockListToMan :: PandocMonad m
- => WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> StateT WriterState m Doc
-blockListToMan opts blocks =
- mapM (blockToMan opts) blocks >>= (return . vcat)
-
--- | Convert list of Pandoc inline elements to man.
-inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc
-inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
-
--- | Convert Pandoc inline element to man.
-inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc
-inlineToMan opts (Span _ ils) = inlineListToMan opts ils
-inlineToMan opts (Emph lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[I]" <> contents <> text "\\f[]"
-inlineToMan opts (Strong lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\f[B]" <> contents <> text "\\f[]"
-inlineToMan opts (Strikeout lst) = do
- contents <- inlineListToMan opts lst
- return $ text "[STRIKEOUT:" <> contents <> char ']'
-inlineToMan opts (Superscript lst) = do
- contents <- inlineListToMan opts lst
- return $ char '^' <> contents <> char '^'
-inlineToMan opts (Subscript lst) = do
- contents <- inlineListToMan opts lst
- return $ char '~' <> contents <> char '~'
-inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported
-inlineToMan opts (Quoted SingleQuote lst) = do
- contents <- inlineListToMan opts lst
- return $ char '`' <> contents <> char '\''
-inlineToMan opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToMan opts lst
- return $ text "\\[lq]" <> contents <> text "\\[rq]"
-inlineToMan opts (Cite _ lst) =
- inlineListToMan opts lst
-inlineToMan _ (Code _ str) =
- return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
-inlineToMan _ (Str str@('.':_)) =
- return $ afterBreak "\\&" <> text (escapeString str)
-inlineToMan _ (Str str) = return $ text $ escapeString str
-inlineToMan opts (Math InlineMath str) =
- lift (texMathToInlines InlineMath str) >>= inlineListToMan opts
-inlineToMan opts (Math DisplayMath str) = do
- contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts
- return $ cr <> text ".RS" $$ contents $$ text ".RE"
-inlineToMan _ il@(RawInline f str)
- | f == Format "man" = return $ text str
- | otherwise = do
- report $ InlineNotRendered il
- return empty
-inlineToMan _ LineBreak = return $
- cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
-inlineToMan _ SoftBreak = return space
-inlineToMan _ Space = return space
-inlineToMan opts (Link _ txt (src, _)) = do
- linktext <- inlineListToMan opts txt
- let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
- return $ case txt of
- [Str s]
- | escapeURI s == srcSuffix ->
- char '<' <> text srcSuffix <> char '>'
- _ -> linktext <> text " (" <> text src <> char ')'
-inlineToMan opts (Image attr alternate (source, tit)) = do
- let txt = if (null alternate) || (alternate == [Str ""]) ||
- (alternate == [Str source]) -- to prevent autolinks
- then [Str "image"]
- else alternate
- linkPart <- inlineToMan opts (Link attr txt (source, tit))
- return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
-inlineToMan _ (Note contents) = do
- -- add to notes in state
- modify $ \st -> st{ stNotes = contents : stNotes st }
- notes <- liftM stNotes get
- let ref = show $ (length notes)
- return $ char '[' <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
deleted file mode 100644
index a97c32542..000000000
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ /dev/null
@@ -1,1147 +0,0 @@
-{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Markdown
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to markdown-formatted plain text.
-
-Markdown: <http://daringfireball.net/projects/markdown/>
--}
-module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
-import Data.Maybe (fromMaybe)
-import Data.Monoid (Any(..))
-import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy )
-import Data.Char ( isSpace, isPunctuation, ord, chr )
-import Data.Ord ( comparing )
-import Text.Pandoc.Pretty
-import Control.Monad.Reader
-import Control.Monad.State
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Writers.Math (texMathToInlines)
-import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
-import Network.URI (isURI)
-import Data.Default
-import Data.Yaml (Value(Object,String,Array,Bool,Number))
-import qualified Data.HashMap.Strict as H
-import qualified Data.Vector as V
-import qualified Data.Text as T
-import qualified Data.Set as Set
-import Network.HTTP ( urlEncode )
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
-type Notes = [[Block]]
-type Ref = ([Inline], Target, Attr)
-type Refs = [Ref]
-
-type MD m = ReaderT WriterEnv (StateT WriterState m)
-
-evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
-evalMD md env st = evalStateT (runReaderT md env) st
-
-data WriterEnv = WriterEnv { envInList :: Bool
- , envPlain :: Bool
- , envRefShortcutable :: Bool
- , envBlockLevel :: Int
- , envEscapeSpaces :: Bool
- }
-
-instance Default WriterEnv
- where def = WriterEnv { envInList = False
- , envPlain = False
- , envRefShortcutable = True
- , envBlockLevel = 0
- , envEscapeSpaces = False
- }
-
-data WriterState = WriterState { stNotes :: Notes
- , stRefs :: Refs
- , stIds :: Set.Set String
- , stNoteNum :: Int
- }
-
-instance Default WriterState
- where def = WriterState{ stNotes = []
- , stRefs = []
- , stIds = Set.empty
- , stNoteNum = 1
- }
-
--- | Convert Pandoc to Markdown.
-writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeMarkdown opts document =
- evalMD (pandocToMarkdown opts{
- writerWrapText = if isEnabled Ext_hard_line_breaks opts
- then WrapNone
- else writerWrapText opts }
- document) def def
-
--- | Convert Pandoc to plain text (like markdown, but without links,
--- pictures, or inline formatting).
-writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writePlain opts document =
- evalMD (pandocToMarkdown opts document) def{ envPlain = True } def
-
-pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc
-pandocTitleBlock tit auths dat =
- hang 2 (text "% ") tit <> cr <>
- hang 2 (text "% ") (vcat $ map nowrap auths) <> cr <>
- hang 2 (text "% ") dat <> cr
-
-mmdTitleBlock :: Value -> Doc
-mmdTitleBlock (Object hashmap) =
- vcat $ map go $ sortBy (comparing fst) $ H.toList hashmap
- where go (k,v) =
- case (text (T.unpack k), v) of
- (k', Array vec)
- | V.null vec -> empty
- | otherwise -> k' <> ":" <> space <>
- hcat (intersperse "; "
- (map fromstr $ V.toList vec))
- (_, String "") -> empty
- (k', x) -> k' <> ":" <> space <> nest 2 (fromstr x)
- fromstr (String s) = text (removeBlankLines $ T.unpack s)
- fromstr (Bool b) = text (show b)
- fromstr (Number n) = text (show n)
- fromstr _ = empty
- -- blank lines not allowed in MMD metadata - we replace with .
- removeBlankLines = trimr . unlines . map (\x ->
- if all isSpace x then "." else x) . lines
-mmdTitleBlock _ = empty
-
-plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc
-plainTitleBlock tit auths dat =
- tit <> cr <>
- (hcat (intersperse (text "; ") auths)) <> cr <>
- dat <> cr
-
-yamlMetadataBlock :: Value -> Doc
-yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "---"
-
-jsonToYaml :: Value -> Doc
-jsonToYaml (Object hashmap) =
- vcat $ map (\(k,v) ->
- case (text (T.unpack k), v, jsonToYaml v) of
- (k', Array vec, x)
- | V.null vec -> empty
- | otherwise -> (k' <> ":") $$ x
- (k', Object _, x) -> (k' <> ":") $$ nest 2 x
- (_, String "", _) -> empty
- (k', _, x) | k == "meta-json" -> empty
- | otherwise -> k' <> ":" <> space <> hang 2 "" x)
- $ sortBy (comparing fst) $ H.toList hashmap
-jsonToYaml (Array vec) =
- vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec
-jsonToYaml (String "") = empty
-jsonToYaml (String s) =
- case T.unpack s of
- x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x
- | not (any isPunctuation x) -> text x
- | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'"
-jsonToYaml (Bool b) = text $ show b
-jsonToYaml (Number n) = text $ show n
-jsonToYaml _ = empty
-
--- | Return markdown representation of document.
-pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String
-pandocToMarkdown opts (Pandoc meta blocks) = do
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- isPlain <- asks envPlain
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToMarkdown opts)
- (fmap (render colwidth) . inlineListToMarkdown opts)
- meta
- let title' = maybe empty text $ getField "title" metadata
- let authors' = maybe [] (map text) $ getField "author" metadata
- let date' = maybe empty text $ getField "date" metadata
- let titleblock = case writerTemplate opts of
- Just _ | isPlain ->
- plainTitleBlock title' authors' date'
- | isEnabled Ext_yaml_metadata_block opts ->
- yamlMetadataBlock metadata
- | isEnabled Ext_pandoc_title_block opts ->
- pandocTitleBlock title' authors' date'
- | isEnabled Ext_mmd_title_block opts ->
- mmdTitleBlock metadata
- | otherwise -> empty
- Nothing -> empty
- let headerBlocks = filter isHeaderBlock blocks
- toc <- if writerTableOfContents opts
- then tableOfContents opts headerBlocks
- else return empty
- -- Strip off final 'references' header if markdown citations enabled
- let blocks' = if isEnabled Ext_citations opts
- then case reverse blocks of
- (Div (_,["references"],_) _):xs -> reverse xs
- _ -> blocks
- else blocks
- body <- blockListToMarkdown opts blocks'
- notesAndRefs' <- notesAndRefs opts
- let render' :: Doc -> String
- render' = render colwidth
- let main = render' $ body <> notesAndRefs'
- let context = defField "toc" (render' toc)
- $ defField "body" main
- $ (if isNullMeta meta
- then id
- else defField "titleblock" (render' titleblock))
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Return markdown representation of reference key table.
-refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc
-refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-
--- | Return markdown representation of a reference key.
-keyToMarkdown :: PandocMonad m
- => WriterOptions
- -> Ref
- -> MD m Doc
-keyToMarkdown opts (label, (src, tit), attr) = do
- label' <- inlineListToMarkdown opts label
- let tit' = if null tit
- then empty
- else space <> "\"" <> text tit <> "\""
- return $ nest 2 $ hang 2
- ("[" <> label' <> "]:" <> space) (text src <> tit')
- <> linkAttributes opts attr
-
--- | Return markdown representation of notes.
-notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc
-notesToMarkdown opts notes = do
- n <- gets stNoteNum
- notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes)
- modify $ \st -> st { stNoteNum = stNoteNum st + length notes }
- return $ vsep notes'
-
--- | Return markdown representation of a note.
-noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc
-noteToMarkdown opts num blocks = do
- contents <- blockListToMarkdown opts blocks
- let num' = text $ writerIdentifierPrefix opts ++ show num
- let marker = if isEnabled Ext_footnotes opts
- then text "[^" <> num' <> text "]:"
- else text "[" <> num' <> text "]"
- let markerSize = 4 + offset num'
- let spacer = case writerTabStop opts - markerSize of
- n | n > 0 -> text $ replicate n ' '
- _ -> text " "
- return $ if isEnabled Ext_footnotes opts
- then hang (writerTabStop opts) (marker <> spacer) contents
- else marker <> spacer <> contents
-
--- | Escape special characters for Markdown.
-escapeString :: WriterOptions -> String -> String
-escapeString _ [] = []
-escapeString opts (c:cs) =
- case c of
- '<' -> "&lt;" ++ escapeString opts cs
- '>' -> "&gt;" ++ escapeString opts cs
- _ | c `elem` ['\\','`','*','_','[',']','#'] ->
- '\\':c:escapeString opts cs
- '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs
- '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs
- '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs
- '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
- '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
- '-' | isEnabled Ext_smart opts ->
- case cs of
- '-':_ -> '\\':'-':escapeString opts cs
- _ -> '-':escapeString opts cs
- '.' | isEnabled Ext_smart opts ->
- case cs of
- '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest
- _ -> '.':escapeString opts cs
- _ -> c : escapeString opts cs
-
--- | Construct table of contents from list of header blocks.
-tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> m Doc
-tableOfContents opts headers =
- let contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers
- in evalMD (blockToMarkdown opts contents) def def
-
--- | Converts an Element to a list item for a table of contents,
-elementToListItem :: WriterOptions -> Element -> [Block]
-elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs)
- = Plain headerLink :
- [ BulletList (map (elementToListItem opts) subsecs) |
- not (null subsecs) && lev < writerTOCDepth opts ]
- where headerLink = if null ident
- then walk deNote headerText
- else [Link nullAttr (walk deNote headerText)
- ('#':ident, "")]
-elementToListItem _ (Blk _) = []
-
-attrsToMarkdown :: Attr -> Doc
-attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
- where attribId = case attribs of
- ([],_,_) -> empty
- (i,_,_) -> "#" <> text i
- attribClasses = case attribs of
- (_,[],_) -> empty
- (_,cs,_) -> hsep $
- map (text . ('.':))
- cs
- attribKeys = case attribs of
- (_,_,[]) -> empty
- (_,_,ks) -> hsep $
- map (\(k,v) -> text k
- <> "=\"" <> text v <> "\"") ks
-
-linkAttributes :: WriterOptions -> Attr -> Doc
-linkAttributes opts attr =
- if isEnabled Ext_link_attributes opts && attr /= nullAttr
- then attrsToMarkdown attr
- else empty
-
--- | Ordered list start parser for use in Para below.
-olMarker :: Parser [Char] ParserState Char
-olMarker = do (start, style', delim) <- anyOrderedListMarker
- if delim == Period &&
- (style' == UpperAlpha || (style' == UpperRoman &&
- start `elem` [1, 5, 10, 50, 100, 500, 1000]))
- then spaceChar >> spaceChar
- else spaceChar
-
--- | True if string begins with an ordered list marker
-beginsWithOrderedListMarker :: String -> Bool
-beginsWithOrderedListMarker str =
- case runParser olMarker defaultParserState "para start" (take 10 str) of
- Left _ -> False
- Right _ -> True
-
-notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc
-notesAndRefs opts = do
- notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts
- modify $ \s -> s { stNotes = [] }
- refs' <- reverse <$> gets stRefs >>= refsToMarkdown opts
- modify $ \s -> s { stRefs = [] }
-
- let endSpacing =
- if | writerReferenceLocation opts == EndOfDocument -> empty
- | isEmpty notes' && isEmpty refs' -> empty
- | otherwise -> blankline
-
- return $
- (if isEmpty notes' then empty else blankline <> notes') <>
- (if isEmpty refs' then empty else blankline <> refs') <>
- endSpacing
-
--- | Convert Pandoc block element to markdown.
-blockToMarkdown :: PandocMonad m
- => WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> MD m Doc
-blockToMarkdown opts blk =
- local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $
- do doc <- blockToMarkdown' opts blk
- blkLevel <- asks envBlockLevel
- if writerReferenceLocation opts == EndOfBlock && blkLevel == 1
- then notesAndRefs opts >>= (\d -> return $ doc <> d)
- else return doc
-
-blockToMarkdown' :: PandocMonad m
- => WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> MD m Doc
-blockToMarkdown' _ Null = return empty
-blockToMarkdown' opts (Div attrs ils) = do
- contents <- blockListToMarkdown opts ils
- return $ if isEnabled Ext_raw_html opts &&
- isEnabled Ext_markdown_in_html_blocks opts
- then tagWithAttrs "div" attrs <> blankline <>
- contents <> blankline <> "</div>" <> blankline
- else contents <> blankline
-blockToMarkdown' opts (Plain inlines) = do
- contents <- inlineListToMarkdown opts inlines
- -- escape if para starts with ordered list marker
- isPlain <- asks envPlain
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- let rendered = render colwidth contents
- let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs
- | otherwise = x : escapeDelimiter xs
- escapeDelimiter [] = []
- let contents' = if isEnabled Ext_all_symbols_escapable opts &&
- not isPlain && beginsWithOrderedListMarker rendered
- then text $ escapeDelimiter rendered
- else contents
- return $ contents' <> cr
--- title beginning with fig: indicates figure
-blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
- blockToMarkdown opts (Para [Image attr alt (src,tit)])
-blockToMarkdown' opts (Para inlines) =
- (<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
-blockToMarkdown' opts (LineBlock lns) =
- if isEnabled Ext_line_blocks opts
- then do
- mdLines <- mapM (inlineListToMarkdown opts) lns
- return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline
- else blockToMarkdown opts $ linesToPara lns
-blockToMarkdown' opts b@(RawBlock f str)
- | f == "markdown" = return $ text str <> text "\n"
- | f == "html" && isEnabled Ext_raw_html opts = do
- plain <- asks envPlain
- return $ if plain
- then empty
- else if isEnabled Ext_markdown_attribute opts
- then text (addMarkdownAttribute str) <> text "\n"
- else text str <> text "\n"
- | f `elem` ["latex", "tex"] && isEnabled Ext_raw_tex opts = do
- plain <- asks envPlain
- return $ if plain
- then empty
- else text str <> text "\n"
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-blockToMarkdown' opts HorizontalRule = do
- return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline
-blockToMarkdown' opts (Header level attr inlines) = do
- -- first, if we're putting references at the end of a section, we
- -- put them here.
- blkLevel <- asks envBlockLevel
- refs <- if writerReferenceLocation opts == EndOfSection && blkLevel == 1
- then notesAndRefs opts
- else return empty
-
- plain <- asks envPlain
- -- we calculate the id that would be used by auto_identifiers
- -- so we know whether to print an explicit identifier
- ids <- gets stIds
- let autoId = uniqueIdent inlines ids
- modify $ \st -> st{ stIds = Set.insert autoId ids }
- let attr' = case attr of
- ("",[],[]) -> empty
- (id',[],[]) | isEnabled Ext_auto_identifiers opts
- && id' == autoId -> empty
- (id',_,_) | isEnabled Ext_mmd_header_identifiers opts ->
- space <> brackets (text id')
- _ | isEnabled Ext_header_attributes opts ->
- space <> attrsToMarkdown attr
- | otherwise -> empty
- contents <- inlineListToMarkdown opts $
- if level == 1 && plain
- then capitalize inlines
- else inlines
- let setext = writerSetextHeaders opts
- hdr = nowrap $ case level of
- 1 | plain -> blanklines 3 <> contents <> blanklines 2
- | setext ->
- contents <> attr' <> cr <> text (replicate (offset contents) '=') <>
- blankline
- 2 | plain -> blanklines 2 <> contents <> blankline
- | setext ->
- contents <> attr' <> cr <> text (replicate (offset contents) '-') <>
- blankline
- -- ghc interprets '#' characters in column 1 as linenum specifiers.
- _ | plain || isEnabled Ext_literate_haskell opts ->
- contents <> blankline
- _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline
-
- return $ refs <> hdr
-blockToMarkdown' opts (CodeBlock (_,classes,_) str)
- | "haskell" `elem` classes && "literate" `elem` classes &&
- isEnabled Ext_literate_haskell opts =
- return $ prefixed "> " (text str) <> blankline
-blockToMarkdown' opts (CodeBlock attribs str) = return $
- case attribs == nullAttr of
- False | isEnabled Ext_backtick_code_blocks opts ->
- backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline
- | isEnabled Ext_fenced_code_blocks opts ->
- tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline
- _ -> nest (writerTabStop opts) (text str) <> blankline
- where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of
- [] -> "~~~~"
- xs -> case maximum $ map length xs of
- n | n < 3 -> "~~~~"
- | otherwise -> replicate (n+1) '~'
- backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of
- [] -> "```"
- xs -> case maximum $ map length xs of
- n | n < 3 -> "```"
- | otherwise -> replicate (n+1) '`'
- attrs = if isEnabled Ext_fenced_code_attributes opts
- then nowrap $ " " <> attrsToMarkdown attribs
- else case attribs of
- (_,(cls:_),_) -> " " <> text cls
- _ -> empty
-blockToMarkdown' opts (BlockQuote blocks) = do
- plain <- asks envPlain
- -- if we're writing literate haskell, put a space before the bird tracks
- -- so they won't be interpreted as lhs...
- let leader = if isEnabled Ext_literate_haskell opts
- then " > "
- else if plain then " " else "> "
- contents <- blockListToMarkdown opts blocks
- return $ (prefixed leader contents) <> blankline
-blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
- caption' <- inlineListToMarkdown opts caption
- let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
- then empty
- else blankline <> ": " <> caption' <> blankline
- rawHeaders <- mapM (blockListToMarkdown opts) headers
- rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
- let isLineBreak LineBreak = Any True
- isLineBreak _ = Any False
- let isSimple = all (==0) widths &&
- not ( getAny (query isLineBreak (headers:rows)) )
- let isPlainBlock (Plain _) = True
- isPlainBlock _ = False
- let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
- (nst,tbl) <- case True of
- _ | isSimple &&
- isEnabled Ext_simple_tables opts -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isSimple &&
- isEnabled Ext_pipe_tables opts -> fmap (id,) $
- pipeTable (all null headers) aligns rawHeaders rawRows
- | not hasBlocks &&
- isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $
- pandocTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isEnabled Ext_grid_tables opts -> fmap (id,) $
- gridTable opts (all null headers) aligns widths
- rawHeaders rawRows
- | isEnabled Ext_raw_html opts -> fmap (id,) $
- text <$>
- (writeHtml5String def $ Pandoc nullMeta [t])
- | otherwise -> return $ (id, text "[TABLE]")
- return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
-blockToMarkdown' opts (BulletList items) = do
- contents <- inList $ mapM (bulletListItemToMarkdown opts) items
- return $ cat contents <> blankline
-blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do
- let start' = if isEnabled Ext_startnum opts then start else 1
- let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle
- let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim
- let attribs = (start', sty', delim')
- let markers = orderedListMarkers attribs
- let markers' = map (\m -> if length m < 3
- then m ++ replicate (3 - length m) ' '
- else m) markers
- contents <- inList $
- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
- zip markers' items
- return $ cat contents <> blankline
-blockToMarkdown' opts (DefinitionList items) = do
- contents <- inList $ mapM (definitionListItemToMarkdown opts) items
- return $ cat contents <> blankline
-
-inList :: Monad m => MD m a -> MD m a
-inList p = local (\env -> env {envInList = True}) p
-
-addMarkdownAttribute :: String -> String
-addMarkdownAttribute s =
- case span isTagText $ reverse $ parseTags s of
- (xs,(TagOpen t attrs:rest)) ->
- renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs)
- where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs,
- x /= "markdown"]
- _ -> s
-
-pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc
-pipeTable headless aligns rawHeaders rawRows = do
- let sp = text " "
- let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
- blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty
- blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty
- blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
- let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
- let torow cs = nowrap $ text "|" <>
- hcat (intersperse (text "|") $
- zipWith3 blockFor aligns widths (map chomp cs))
- <> text "|"
- let toborder (a, w) = text $ case a of
- AlignLeft -> ':':replicate (w + 1) '-'
- AlignCenter -> ':':replicate w '-' ++ ":"
- AlignRight -> replicate (w + 1) '-' ++ ":"
- AlignDefault -> replicate (w + 2) '-'
- -- note: pipe tables can't completely lack a
- -- header; for a headerless table, we need a header of empty cells.
- -- see jgm/pandoc#1996.
- let header = if headless
- then torow (replicate (length aligns) empty)
- else torow rawHeaders
- let border = nowrap $ text "|" <> hcat (intersperse (text "|") $
- map toborder $ zip aligns widths) <> text "|"
- let body = vcat $ map torow rawRows
- return $ header $$ border $$ body
-
-pandocTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> MD m Doc
-pandocTable opts headless aligns widths rawHeaders rawRows = do
- let isSimple = all (==0) widths
- let alignHeader alignment = case alignment of
- AlignLeft -> lblock
- AlignCenter -> cblock
- AlignRight -> rblock
- AlignDefault -> lblock
- -- Number of characters per column necessary to output every cell
- -- without requiring a line break.
- -- The @+2@ is needed for specifying the alignment.
- let numChars = (+ 2) . maximum . map offset
- -- Number of characters per column necessary to output every cell
- -- without requiring a line break *inside a word*.
- -- The @+2@ is needed for specifying the alignment.
- let minNumChars = (+ 2) . maximum . map minOffset
- let columns = transpose (rawHeaders : rawRows)
- -- minimal column width without wrapping a single word
- let noWordWrapWidth
- | writerWrapText opts == WrapAuto
- = fromIntegral $ maximum (map minNumChars columns)
- | otherwise = fromIntegral $ maximum (map numChars columns)
- let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w)
- (noWordWrapWidth * w / minimum widths)
- let widthsInChars
- | isSimple = map numChars columns
- | otherwise = map relWidth widths
- let makeRow = hcat . intersperse (lblock 1 (text " ")) .
- (zipWith3 alignHeader aligns widthsInChars)
- let rows' = map makeRow rawRows
- let head' = makeRow rawHeaders
- let maxRowHeight = maximum $ map height (head':rows')
- let underline = cat $ intersperse (text " ") $
- map (\width -> text (replicate width '-')) widthsInChars
- let border = if maxRowHeight > 1
- then text (replicate (sum widthsInChars +
- length widthsInChars - 1) '-')
- else if headless
- then underline
- else empty
- let head'' = if headless
- then empty
- else border <> cr <> head'
- let body = if maxRowHeight > 1
- then vsep rows'
- else vcat rows'
- let bottom = if headless
- then underline
- else border
- return $ head'' $$ underline $$ body $$ bottom
-
-gridTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double]
- -> [Doc] -> [[Doc]] -> MD m Doc
-gridTable opts headless aligns widths headers' rawRows = do
- let numcols = length headers'
- let widths' = if all (==0) widths
- then replicate numcols (1.0 / fromIntegral numcols)
- else widths
- let widthsInChars = map
- ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths'
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = maximum (1 : map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = chomp $ hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map (makeRow . map chomp) rawRows
- let borderpart ch align widthInChars =
- let widthInChars' = if widthInChars < 1 then 1 else widthInChars
- in (if (align == AlignLeft || align == AlignCenter)
- then char ':'
- else char ch) <>
- text (replicate widthInChars' ch) <>
- (if (align == AlignRight || align == AlignCenter)
- then char ':'
- else char ch)
- let border ch aligns' widthsInChars' =
- char '+' <>
- hcat (intersperse (char '+') (zipWith (borderpart ch)
- aligns' widthsInChars')) <> char '+'
- let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars)
- rows'
- let head'' = if headless
- then empty
- else head' $$ border '=' aligns widthsInChars
- if headless
- then return $
- border '-' aligns widthsInChars $$
- body $$
- border '-' (repeat AlignDefault) widthsInChars
- else return $
- border '-' (repeat AlignDefault) widthsInChars $$
- head'' $$
- body $$
- border '-' (repeat AlignDefault) widthsInChars
-
-itemEndsWithTightList :: [Block] -> Bool
-itemEndsWithTightList bs =
- case bs of
- [Plain _, BulletList xs] -> isTightList xs
- [Plain _, OrderedList _ xs] -> isTightList xs
- _ -> False
-
--- | Convert bullet list item (list of blocks) to markdown.
-bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc
-bulletListItemToMarkdown opts bs = do
- contents <- blockListToMarkdown opts bs
- let sps = replicate (writerTabStop opts - 2) ' '
- let start = text ('-' : ' ' : sps)
- -- remove trailing blank line if item ends with a tight list
- let contents' = if itemEndsWithTightList bs
- then chomp contents <> cr
- else contents
- return $ hang (writerTabStop opts) start $ contents' <> cr
-
--- | Convert ordered list item (a list of blocks) to markdown.
-orderedListItemToMarkdown :: PandocMonad m
- => WriterOptions -- ^ options
- -> String -- ^ list item marker
- -> [Block] -- ^ list item (list of blocks)
- -> MD m Doc
-orderedListItemToMarkdown opts marker bs = do
- contents <- blockListToMarkdown opts bs
- let sps = case length marker - writerTabStop opts of
- n | n > 0 -> text $ replicate n ' '
- _ -> text " "
- let start = text marker <> sps
- -- remove trailing blank line if item ends with a tight list
- let contents' = if itemEndsWithTightList bs
- then chomp contents <> cr
- else contents
- return $ hang (writerTabStop opts) start $ contents' <> cr
-
--- | Convert definition list item (label, list of blocks) to markdown.
-definitionListItemToMarkdown :: PandocMonad m
- => WriterOptions
- -> ([Inline],[[Block]])
- -> MD m Doc
-definitionListItemToMarkdown opts (label, defs) = do
- labelText <- inlineListToMarkdown opts label
- defs' <- mapM (mapM (blockToMarkdown opts)) defs
- if isEnabled Ext_definition_lists opts
- then do
- let tabStop = writerTabStop opts
- isPlain <- asks envPlain
- let leader = if isPlain then " " else ": "
- let sps = case writerTabStop opts - 3 of
- n | n > 0 -> text $ replicate n ' '
- _ -> text " "
- if isEnabled Ext_compact_definition_lists opts
- then do
- let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
- $ vcat d <> cr) defs'
- return $ nowrap labelText <> cr <> contents <> cr
- else do
- let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
- $ vcat d <> cr) defs'
- let isTight = case defs of
- ((Plain _ : _): _) -> True
- _ -> False
- return $ blankline <> nowrap labelText <>
- (if isTight then cr else blankline) <> contents <> blankline
- else do
- return $ nowrap labelText <> text " " <> cr <>
- vsep (map vsep defs') <> blankline
-
--- | Convert list of Pandoc block elements to markdown.
-blockListToMarkdown :: PandocMonad m
- => WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> MD m Doc
-blockListToMarkdown opts blocks =
- mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
- -- insert comment between list and indented code block, or the
- -- code block will be treated as a list continuation paragraph
- where fixBlocks (b : CodeBlock attr x : rest)
- | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
- && isListBlock b = b : commentSep : CodeBlock attr x :
- fixBlocks rest
- fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
- b1 : commentSep : fixBlocks (b2:bs)
- fixBlocks (x : xs) = x : fixBlocks xs
- fixBlocks [] = []
- isListBlock (BulletList _) = True
- isListBlock (OrderedList _ _) = True
- isListBlock (DefinitionList _) = True
- isListBlock _ = False
- commentSep = if isEnabled Ext_raw_html opts
- then RawBlock "html" "<!-- -->\n"
- else RawBlock "markdown" "&nbsp;"
-
--- | Get reference for target; if none exists, create unique one and return.
--- Prefer label if possible; otherwise, generate a unique key.
-getReference :: PandocMonad m => Attr -> [Inline] -> Target -> MD m [Inline]
-getReference attr label target = do
- st <- get
- case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of
- Just (ref, _, _) -> return ref
- Nothing -> do
- label' <- case find (\(l,_,_) -> l == label) (stRefs st) of
- Just _ -> -- label is used; generate numerical label
- case find (\n -> notElem [Str (show n)]
- (map (\(l,_,_) -> l) (stRefs st)))
- [1..(10000 :: Integer)] of
- Just x -> return [Str (show x)]
- Nothing -> throwError $ PandocSomeError "no unique label"
- Nothing -> return label
- modify (\s -> s{ stRefs = (label', target, attr) : stRefs st })
- return label'
-
--- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc
-inlineListToMarkdown opts lst = do
- inlist <- asks envInList
- go (if inlist then avoidBadWrapsInList lst else lst)
- where go [] = return empty
- go (i:is) = case i of
- (Link _ _ _) -> case is of
- -- If a link is followed by another link or '[' we don't shortcut
- (Link _ _ _):_ -> unshortcutable
- Space:(Link _ _ _):_ -> unshortcutable
- Space:(Str('[':_)):_ -> unshortcutable
- Space:(RawInline _ ('[':_)):_ -> unshortcutable
- Space:(Cite _ _):_ -> unshortcutable
- SoftBreak:(Link _ _ _):_ -> unshortcutable
- SoftBreak:(Str('[':_)):_ -> unshortcutable
- SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable
- SoftBreak:(Cite _ _):_ -> unshortcutable
- (Cite _ _):_ -> unshortcutable
- Str ('[':_):_ -> unshortcutable
- (RawInline _ ('[':_)):_ -> unshortcutable
- (RawInline _ (' ':'[':_)):_ -> unshortcutable
- _ -> shortcutable
- _ -> shortcutable
- where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
- unshortcutable = do
- iMark <- local
- (\env -> env { envRefShortcutable = False })
- (inlineToMarkdown opts i)
- fmap (iMark <>) (go is)
-
-isSp :: Inline -> Bool
-isSp Space = True
-isSp SoftBreak = True
-isSp _ = False
-
-avoidBadWrapsInList :: [Inline] -> [Inline]
-avoidBadWrapsInList [] = []
-avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s =
- Str (' ':'>':cs) : avoidBadWrapsInList xs
-avoidBadWrapsInList (s:Str [c]:[])
- | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : []
-avoidBadWrapsInList (s:Str [c]:Space:xs)
- | isSp s && c `elem` ['-','*','+'] =
- Str [' ', c] : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList (s:Str cs:Space:xs)
- | isSp s && isOrderedListMarker cs =
- Str (' ':cs) : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList (s:Str cs:[])
- | isSp s && isOrderedListMarker cs = Str (' ':cs) : []
-avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
-
-isOrderedListMarker :: String -> Bool
-isOrderedListMarker xs = (last xs `elem` ['.',')']) &&
- isRight (runParser (anyOrderedListMarker >> eof)
- defaultParserState "" xs)
-
-isRight :: Either a b -> Bool
-isRight (Right _) = True
-isRight (Left _) = False
-
--- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc
-inlineToMarkdown opts (Span attrs ils) = do
- plain <- asks envPlain
- contents <- inlineListToMarkdown opts ils
- return $ case plain of
- True -> contents
- False | isEnabled Ext_bracketed_spans opts ->
- "[" <> contents <> "]" <>
- if attrs == nullAttr
- then "{}"
- else linkAttributes opts attrs
- | isEnabled Ext_raw_html opts ||
- isEnabled Ext_native_spans opts ->
- tagWithAttrs "span" attrs <> contents <> text "</span>"
- | otherwise -> contents
-inlineToMarkdown opts (Emph lst) = do
- plain <- asks envPlain
- contents <- inlineListToMarkdown opts lst
- return $ if plain
- then "_" <> contents <> "_"
- else "*" <> contents <> "*"
-inlineToMarkdown opts (Strong lst) = do
- plain <- asks envPlain
- if plain
- then inlineListToMarkdown opts $ capitalize lst
- else do
- contents <- inlineListToMarkdown opts lst
- return $ "**" <> contents <> "**"
-inlineToMarkdown opts (Strikeout lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_strikeout opts
- then "~~" <> contents <> "~~"
- else if isEnabled Ext_raw_html opts
- then "<s>" <> contents <> "</s>"
- else contents
-inlineToMarkdown opts (Superscript lst) =
- local (\env -> env {envEscapeSpaces = True}) $ do
- contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_superscript opts
- then "^" <> contents <> "^"
- else if isEnabled Ext_raw_html opts
- then "<sup>" <> contents <> "</sup>"
- else case (render Nothing contents) of
- ds | all (\d -> d >= '0' && d <= '9') ds
- -> text (map toSuperscript ds)
- _ -> contents
- where toSuperscript '1' = '\x00B9'
- toSuperscript '2' = '\x00B2'
- toSuperscript '3' = '\x00B3'
- toSuperscript c = chr (0x2070 + (ord c - 48))
-inlineToMarkdown opts (Subscript lst) =
- local (\env -> env {envEscapeSpaces = True}) $ do
- contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_subscript opts
- then "~" <> contents <> "~"
- else if isEnabled Ext_raw_html opts
- then "<sub>" <> contents <> "</sub>"
- else case (render Nothing contents) of
- ds | all (\d -> d >= '0' && d <= '9') ds
- -> text (map toSubscript ds)
- _ -> contents
- where toSubscript c = chr (0x2080 + (ord c - 48))
-inlineToMarkdown opts (SmallCaps lst) = do
- plain <- asks envPlain
- if not plain &&
- (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
- then do
- contents <- inlineListToMarkdown opts lst
- return $ tagWithAttrs "span"
- ("",[],[("style","font-variant:small-caps;")])
- <> contents <> text "</span>"
- else inlineListToMarkdown opts $ capitalize lst
-inlineToMarkdown opts (Quoted SingleQuote lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_smart opts
- then "'" <> contents <> "'"
- else "‘" <> contents <> "’"
-inlineToMarkdown opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_smart opts
- then "\"" <> contents <> "\""
- else "“" <> contents <> "”"
-inlineToMarkdown opts (Code attr str) = do
- let tickGroups = filter (\s -> '`' `elem` s) $ group str
- let longest = if null tickGroups
- then 0
- else maximum $ map length tickGroups
- let marker = replicate (longest + 1) '`'
- let spacer = if (longest == 0) then "" else " "
- let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
- then attrsToMarkdown attr
- else empty
- plain <- asks envPlain
- if plain
- then return $ text str
- else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
-inlineToMarkdown opts (Str str) = do
- isPlain <- asks envPlain
- let str' = (if isEnabled Ext_smart opts
- then unsmartify opts
- else id) $
- if isPlain
- then str
- else escapeString opts str
- return $ text str'
-inlineToMarkdown opts (Math InlineMath str) =
- case writerHTMLMathMethod opts of
- WebTeX url ->
- inlineToMarkdown opts (Image nullAttr [Str str]
- (url ++ urlEncode str, str))
- _ | isEnabled Ext_tex_math_dollars opts ->
- return $ "$" <> text str <> "$"
- | isEnabled Ext_tex_math_single_backslash opts ->
- return $ "\\(" <> text str <> "\\)"
- | isEnabled Ext_tex_math_double_backslash opts ->
- return $ "\\\\(" <> text str <> "\\\\)"
- | otherwise -> do
- plain <- asks envPlain
- texMathToInlines InlineMath str >>=
- inlineListToMarkdown opts .
- (if plain then makeMathPlainer else id)
-inlineToMarkdown opts (Math DisplayMath str) =
- case writerHTMLMathMethod opts of
- WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
- inlineToMarkdown opts (Image nullAttr [Str str]
- (url ++ urlEncode str, str))
- _ | isEnabled Ext_tex_math_dollars opts ->
- return $ "$$" <> text str <> "$$"
- | isEnabled Ext_tex_math_single_backslash opts ->
- return $ "\\[" <> text str <> "\\]"
- | isEnabled Ext_tex_math_double_backslash opts ->
- return $ "\\\\[" <> text str <> "\\\\]"
- | otherwise -> (\x -> cr <> x <> cr) `fmap`
- (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
-inlineToMarkdown opts il@(RawInline f str) = do
- plain <- asks envPlain
- if not plain &&
- ( f == "markdown" ||
- (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) ||
- (isEnabled Ext_raw_html opts && f == "html") )
- then return $ text str
- else do
- report $ InlineNotRendered il
- return empty
-inlineToMarkdown opts (LineBreak) = do
- plain <- asks envPlain
- if plain || isEnabled Ext_hard_line_breaks opts
- then return cr
- else return $
- if isEnabled Ext_escaped_line_breaks opts
- then "\\" <> cr
- else " " <> cr
-inlineToMarkdown _ Space = do
- escapeSpaces <- asks envEscapeSpaces
- return $ if escapeSpaces then "\\ " else space
-inlineToMarkdown opts SoftBreak = do
- escapeSpaces <- asks envEscapeSpaces
- let space' = if escapeSpaces then "\\ " else space
- return $ case writerWrapText opts of
- WrapNone -> space'
- WrapAuto -> space'
- WrapPreserve -> cr
-inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
-inlineToMarkdown opts (Cite (c:cs) lst)
- | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
- | otherwise =
- if citationMode c == AuthorInText
- then do
- suffs <- inlineListToMarkdown opts $ citationSuffix c
- rest <- mapM convertOne cs
- let inbr = suffs <+> joincits rest
- br = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
- return $ text ("@" ++ citationId c) <+> br
- else do
- cits <- mapM convertOne (c:cs)
- return $ text "[" <> joincits cits <> text "]"
- where
- joincits = hcat . intersperse (text "; ") . filter (not . isEmpty)
- convertOne Citation { citationId = k
- , citationPrefix = pinlines
- , citationSuffix = sinlines
- , citationMode = m }
- = do
- pdoc <- inlineListToMarkdown opts pinlines
- sdoc <- inlineListToMarkdown opts sinlines
- let k' = text (modekey m ++ "@" ++ k)
- r = case sinlines of
- Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
- _ -> k' <+> sdoc
- return $ pdoc <+> r
- modekey SuppressAuthor = "-"
- modekey _ = ""
-inlineToMarkdown opts lnk@(Link attr txt (src, tit))
- | isEnabled Ext_raw_html opts &&
- not (isEnabled Ext_link_attributes opts) &&
- attr /= nullAttr = -- use raw HTML
- (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
- | otherwise = do
- plain <- asks envPlain
- linktext <- inlineListToMarkdown opts txt
- let linktitle = if null tit
- then empty
- else text $ " \"" ++ tit ++ "\""
- let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
- let useAuto = isURI src &&
- case txt of
- [Str s] | escapeURI s == srcSuffix -> True
- _ -> False
- let useRefLinks = writerReferenceLinks opts && not useAuto
- shortcutable <- asks envRefShortcutable
- let useShortcutRefLinks = shortcutable &&
- isEnabled Ext_shortcut_reference_links opts
- ref <- if useRefLinks then getReference attr txt (src, tit) else return []
- reftext <- inlineListToMarkdown opts ref
- return $ if useAuto
- then if plain
- then text srcSuffix
- else "<" <> text srcSuffix <> ">"
- else if useRefLinks
- then let first = "[" <> linktext <> "]"
- second = if txt == ref
- then if useShortcutRefLinks
- then ""
- else "[]"
- else "[" <> reftext <> "]"
- in first <> second
- else if plain
- then linktext
- else "[" <> linktext <> "](" <>
- text src <> linktitle <> ")" <>
- linkAttributes opts attr
-inlineToMarkdown opts img@(Image attr alternate (source, tit))
- | isEnabled Ext_raw_html opts &&
- not (isEnabled Ext_link_attributes opts) &&
- attr /= nullAttr = -- use raw HTML
- (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]])
- | otherwise = do
- plain <- asks envPlain
- let txt = if null alternate || alternate == [Str source]
- -- to prevent autolinks
- then [Str ""]
- else alternate
- linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
- return $ if plain
- then "[" <> linkPart <> "]"
- else "!" <> linkPart
-inlineToMarkdown opts (Note contents) = do
- modify (\st -> st{ stNotes = contents : stNotes st })
- st <- get
- let ref = text $ writerIdentifierPrefix opts ++ show (stNoteNum st + (length $ stNotes st) - 1)
- if isEnabled Ext_footnotes opts
- then return $ "[^" <> ref <> "]"
- else return $ "[" <> ref <> "]"
-
-makeMathPlainer :: [Inline] -> [Inline]
-makeMathPlainer = walk go
- where
- go (Emph xs) = Span nullAttr xs
- go x = x
-
diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs
deleted file mode 100644
index b7419ddf9..000000000
--- a/src/Text/Pandoc/Writers/Math.hs
+++ /dev/null
@@ -1,49 +0,0 @@
-module Text.Pandoc.Writers.Math
- ( texMathToInlines
- , convertMath
- )
-where
-
-import Text.Pandoc.Class
-import Text.Pandoc.Definition
-import Text.Pandoc.Logging
-import Text.TeXMath (Exp, writePandoc, DisplayType(..), readTeX)
-
--- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ or @$$@ characters if entire formula
--- can't be converted.
-texMathToInlines :: PandocMonad m
- => MathType
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> m [Inline]
-texMathToInlines mt inp = do
- res <- convertMath writePandoc mt inp
- case res of
- Right (Just ils) -> return ils
- Right (Nothing) -> do
- report $ CouldNotConvertTeXMath inp ""
- return [mkFallback mt inp]
- Left il -> return [il]
-
-mkFallback :: MathType -> String -> Inline
-mkFallback mt str = Str (delim ++ str ++ delim)
- where delim = case mt of
- DisplayMath -> "$$"
- InlineMath -> "$"
-
--- | Converts a raw TeX math formula using a writer function,
--- issuing a warning and producing a fallback (a raw string)
--- on failure.
-convertMath :: PandocMonad m
- => (DisplayType -> [Exp] -> a) -> MathType -> String
- -> m (Either Inline a)
-convertMath writer mt str = do
- case writer dt <$> readTeX str of
- Right r -> return (Right r)
- Left e -> do
- report $ CouldNotConvertTeXMath str e
- return (Left $ mkFallback mt str)
- where dt = case mt of
- DisplayMath -> DisplayBlock
- InlineMath -> DisplayInline
-
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
deleted file mode 100644
index dc6206e6c..000000000
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ /dev/null
@@ -1,442 +0,0 @@
-{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.MediaWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to MediaWiki markup.
-
-MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
--}
-module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Pretty (render)
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intersect, intercalate )
-import Network.URI ( isURI )
-import Control.Monad.Reader
-import Control.Monad.State
-import Text.Pandoc.Class (PandocMonad)
-
-data WriterState = WriterState {
- stNotes :: Bool -- True if there are notes
- , stOptions :: WriterOptions -- writer options
- }
-
-data WriterReader = WriterReader {
- options :: WriterOptions -- Writer options
- , listLevel :: String -- String at beginning of list items, e.g. "**"
- , useTags :: Bool -- True if we should use HTML tags because we're in a complex list
- }
-
-type MediaWikiWriter = ReaderT WriterReader (State WriterState)
-
--- | Convert Pandoc to MediaWiki.
-writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeMediaWiki opts document = return $
- let initialState = WriterState { stNotes = False, stOptions = opts }
- env = WriterReader { options = opts, listLevel = [], useTags = False }
- in evalState (runReaderT (pandocToMediaWiki document) env) initialState
-
--- | Return MediaWiki representation of document.
-pandocToMediaWiki :: Pandoc -> MediaWikiWriter String
-pandocToMediaWiki (Pandoc meta blocks) = do
- opts <- asks options
- metadata <- metaToJSON opts
- (fmap trimr . blockListToMediaWiki)
- inlineListToMediaWiki
- meta
- body <- blockListToMediaWiki blocks
- notesExist <- gets stNotes
- let notes = if notesExist
- then "\n<references />"
- else ""
- let main = body ++ notes
- let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts) metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
--- | Escape special characters for MediaWiki.
-escapeString :: String -> String
-escapeString = escapeStringForXML
-
--- | Convert Pandoc block element to MediaWiki.
-blockToMediaWiki :: Block -- ^ Block element
- -> MediaWikiWriter String
-
-blockToMediaWiki Null = return ""
-
-blockToMediaWiki (Div attrs bs) = do
- contents <- blockListToMediaWiki bs
- return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++
- contents ++ "\n\n" ++ "</div>"
-
-blockToMediaWiki (Plain inlines) =
- inlineListToMediaWiki inlines
-
--- title beginning with fig: indicates that the image is a figure
-blockToMediaWiki (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- if null txt
- then return ""
- else ("|caption " ++) `fmap` inlineListToMediaWiki txt
- img <- imageToMediaWiki attr
- let opt = if null txt
- then ""
- else "|alt=" ++ if null tit then capt else tit ++ capt
- return $ "[[File:" ++ src ++ "|frame|none" ++ img ++ opt ++ "]]\n"
-
-blockToMediaWiki (Para inlines) = do
- tags <- asks useTags
- lev <- asks listLevel
- contents <- inlineListToMediaWiki inlines
- return $ if tags
- then "<p>" ++ contents ++ "</p>"
- else contents ++ if null lev then "\n" else ""
-
-blockToMediaWiki (LineBlock lns) =
- blockToMediaWiki $ linesToPara lns
-
-blockToMediaWiki (RawBlock f str)
- | f == Format "mediawiki" = return str
- | f == Format "html" = return str
- | otherwise = return ""
-
-blockToMediaWiki HorizontalRule = return "\n-----\n"
-
-blockToMediaWiki (Header level _ inlines) = do
- contents <- inlineListToMediaWiki inlines
- let eqs = replicate level '='
- return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
-
-blockToMediaWiki (CodeBlock (_,classes,_) str) = do
- let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
- "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
- "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
- "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
- "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
- "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
- "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
- "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
- "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
- return $
- if null at
- then "<pre" ++ (if null classes
- then ">"
- else " class=\"" ++ unwords classes ++ "\">") ++
- escapeString str ++ "</pre>"
- else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>"
- -- note: no escape!
-
-blockToMediaWiki (BlockQuote blocks) = do
- contents <- blockListToMediaWiki blocks
- return $ "<blockquote>" ++ contents ++ "</blockquote>"
-
-blockToMediaWiki (Table capt aligns widths headers rows') = do
- caption <- if null capt
- then return ""
- else do
- c <- inlineListToMediaWiki capt
- return $ "|+ " ++ trimr c ++ "\n"
- let headless = all null headers
- let allrows = if headless then rows' else headers:rows'
- tableBody <- intercalate "|-\n" `fmap`
- mapM (tableRowToMediaWiki headless aligns widths)
- (zip [1..] allrows)
- return $ "{|\n" ++ caption ++ tableBody ++ "|}\n"
-
-blockToMediaWiki x@(BulletList items) = do
- tags <- fmap (|| not (isSimpleList x)) $ asks useTags
- if tags
- then do
- contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items
- return $ "<ul>\n" ++ vcat contents ++ "</ul>\n"
- else do
- lev <- asks listLevel
- contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items
- return $ vcat contents ++ if null lev then "\n" else ""
-
-blockToMediaWiki x@(OrderedList attribs items) = do
- tags <- fmap (|| not (isSimpleList x)) $ asks useTags
- if tags
- then do
- contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items
- return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n"
- else do
- lev <- asks listLevel
- contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items
- return $ vcat contents ++ if null lev then "\n" else ""
-
-blockToMediaWiki x@(DefinitionList items) = do
- tags <- fmap (|| not (isSimpleList x)) $ asks useTags
- if tags
- then do
- contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items
- return $ "<dl>\n" ++ vcat contents ++ "</dl>\n"
- else do
- lev <- asks listLevel
- contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items
- return $ vcat contents ++ if null lev then "\n" else ""
-
--- Auxiliary functions for lists:
-
--- | Convert ordered list attributes to HTML attribute string
-listAttribsToString :: ListAttributes -> String
-listAttribsToString (startnum, numstyle, _) =
- let numstyle' = camelCaseToHyphenated $ show numstyle
- in (if startnum /= 1
- then " start=\"" ++ show startnum ++ "\""
- else "") ++
- (if numstyle /= DefaultStyle
- then " style=\"list-style-type: " ++ numstyle' ++ ";\""
- else "")
-
--- | Convert bullet or ordered list item (list of blocks) to MediaWiki.
-listItemToMediaWiki :: [Block] -> MediaWikiWriter String
-listItemToMediaWiki items = do
- contents <- blockListToMediaWiki items
- tags <- asks useTags
- if tags
- then return $ "<li>" ++ contents ++ "</li>"
- else do
- marker <- asks listLevel
- return $ marker ++ " " ++ contents
-
--- | Convert definition list item (label, list of blocks) to MediaWiki.
-definitionListItemToMediaWiki :: ([Inline],[[Block]])
- -> MediaWikiWriter String
-definitionListItemToMediaWiki (label, items) = do
- labelText <- inlineListToMediaWiki label
- contents <- mapM blockListToMediaWiki items
- tags <- asks useTags
- if tags
- then return $ "<dt>" ++ labelText ++ "</dt>\n" ++
- intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents)
- else do
- marker <- asks listLevel
- return $ marker ++ " " ++ labelText ++ "\n" ++
- intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents)
-
--- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
-isSimpleList :: Block -> Bool
-isSimpleList x =
- case x of
- BulletList items -> all isSimpleListItem items
- OrderedList (num, sty, _) items -> all isSimpleListItem items &&
- num == 1 && sty `elem` [DefaultStyle, Decimal]
- DefinitionList items -> all isSimpleListItem $ concatMap snd items
- _ -> False
-
--- | True if list item can be handled with the simple wiki syntax. False if
--- HTML tags will be needed.
-isSimpleListItem :: [Block] -> Bool
-isSimpleListItem [] = True
-isSimpleListItem [x] =
- case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- DefinitionList _ -> isSimpleList x
- _ -> False
-isSimpleListItem [x, y] | isPlainOrPara x =
- case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- DefinitionList _ -> isSimpleList y
- _ -> False
-isSimpleListItem _ = False
-
-isPlainOrPara :: Block -> Bool
-isPlainOrPara (Plain _) = True
-isPlainOrPara (Para _) = True
-isPlainOrPara _ = False
-
--- | Concatenates strings with line breaks between them.
-vcat :: [String] -> String
-vcat = intercalate "\n"
-
--- Auxiliary functions for tables:
-
-tableRowToMediaWiki :: Bool
- -> [Alignment]
- -> [Double]
- -> (Int, [[Block]])
- -> MediaWikiWriter String
-tableRowToMediaWiki headless alignments widths (rownum, cells) = do
- cells' <- mapM (tableCellToMediaWiki headless rownum)
- $ zip3 alignments widths cells
- return $ unlines cells'
-
-tableCellToMediaWiki :: Bool
- -> Int
- -> (Alignment, Double, [Block])
- -> MediaWikiWriter String
-tableCellToMediaWiki headless rownum (alignment, width, bs) = do
- contents <- blockListToMediaWiki bs
- let marker = if rownum == 1 && not headless then "!" else "|"
- let percent w = show (truncate (100*w) :: Integer) ++ "%"
- let attrs = ["align=" ++ show (alignmentToString alignment) |
- alignment /= AlignDefault && alignment /= AlignLeft] ++
- ["width=\"" ++ percent width ++ "\"" |
- width /= 0.0 && rownum == 1]
- let attr = if null attrs
- then ""
- else unwords attrs ++ "|"
- let sep = case bs of
- [Plain _] -> " "
- [Para _] -> " "
- _ -> "\n"
- return $ marker ++ attr ++ sep ++ trimr contents
-
-alignmentToString :: Alignment -> String
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
-imageToMediaWiki :: Attr -> MediaWikiWriter String
-imageToMediaWiki attr = do
- opts <- gets stOptions
- let (_, cls, _) = attr
- toPx = fmap (showInPixel opts) . checkPct
- checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
- go (Just w) Nothing = '|':w ++ "px"
- go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px"
- go Nothing (Just h) = "|x" ++ h ++ "px"
- go Nothing Nothing = ""
- dims = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
- classes = if null cls
- then ""
- else "|class=" ++ unwords cls
- return $ dims ++ classes
-
--- | Convert list of Pandoc block elements to MediaWiki.
-blockListToMediaWiki :: [Block] -- ^ List of block elements
- -> MediaWikiWriter String
-blockListToMediaWiki blocks =
- fmap vcat $ mapM blockToMediaWiki blocks
-
--- | Convert list of Pandoc inline elements to MediaWiki.
-inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String
-inlineListToMediaWiki lst =
- fmap concat $ mapM inlineToMediaWiki lst
-
--- | Convert Pandoc inline element to MediaWiki.
-inlineToMediaWiki :: Inline -> MediaWikiWriter String
-
-inlineToMediaWiki (Span attrs ils) = do
- contents <- inlineListToMediaWiki ils
- return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>"
-
-inlineToMediaWiki (Emph lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "''" ++ contents ++ "''"
-
-inlineToMediaWiki (Strong lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "'''" ++ contents ++ "'''"
-
-inlineToMediaWiki (Strikeout lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "<s>" ++ contents ++ "</s>"
-
-inlineToMediaWiki (Superscript lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "<sup>" ++ contents ++ "</sup>"
-
-inlineToMediaWiki (Subscript lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "<sub>" ++ contents ++ "</sub>"
-
-inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst
-
-inlineToMediaWiki (Quoted SingleQuote lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "\8216" ++ contents ++ "\8217"
-
-inlineToMediaWiki (Quoted DoubleQuote lst) = do
- contents <- inlineListToMediaWiki lst
- return $ "\8220" ++ contents ++ "\8221"
-
-inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst
-
-inlineToMediaWiki (Code _ str) =
- return $ "<code>" ++ escapeString str ++ "</code>"
-
-inlineToMediaWiki (Str str) = return $ escapeString str
-
-inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>"
- -- note: str should NOT be escaped
-
-inlineToMediaWiki (RawInline f str)
- | f == Format "mediawiki" = return str
- | f == Format "html" = return str
- | otherwise = return ""
-
-inlineToMediaWiki LineBreak = return "<br />\n"
-
-inlineToMediaWiki SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- case wrapText of
- WrapAuto -> return " "
- WrapNone -> return " "
- WrapPreserve -> return "\n"
-
-inlineToMediaWiki Space = return " "
-
-inlineToMediaWiki (Link _ txt (src, _)) = do
- label <- inlineListToMediaWiki txt
- case txt of
- [Str s] | isURI src && escapeURI s == src -> return src
- _ -> return $ if isURI src
- then "[" ++ src ++ " " ++ label ++ "]"
- else "[[" ++ src' ++ "|" ++ label ++ "]]"
- where src' = case src of
- '/':xs -> xs -- with leading / it's a
- _ -> src -- link to a help page
-
-inlineToMediaWiki (Image attr alt (source, tit)) = do
- img <- imageToMediaWiki attr
- alt' <- inlineListToMediaWiki alt
- let txt = if null tit
- then if null alt
- then ""
- else '|' : alt'
- else '|' : tit
- return $ "[[File:" ++ source ++ img ++ txt ++ "]]"
-
-inlineToMediaWiki (Note contents) = do
- contents' <- blockListToMediaWiki contents
- modify (\s -> s { stNotes = True })
- return $ "<ref>" ++ contents' ++ "</ref>"
- -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
deleted file mode 100644
index 2421fd94d..000000000
--- a/src/Text/Pandoc/Writers/Native.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Native
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of a 'Pandoc' document to a string representation.
--}
-module Text.Pandoc.Writers.Native ( writeNative )
-where
-import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
-import Data.List ( intersperse )
-import Text.Pandoc.Definition
-import Text.Pandoc.Pretty
-import Text.Pandoc.Class (PandocMonad)
-
-prettyList :: [Doc] -> Doc
-prettyList ds =
- "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]"
-
--- | Prettyprint Pandoc block element.
-prettyBlock :: Block -> Doc
-prettyBlock (LineBlock lines') =
- "LineBlock" $$ prettyList (map (text . show) lines')
-prettyBlock (BlockQuote blocks) =
- "BlockQuote" $$ prettyList (map prettyBlock blocks)
-prettyBlock (OrderedList attribs blockLists) =
- "OrderedList" <> space <> text (show attribs) $$
- (prettyList $ map (prettyList . map prettyBlock) blockLists)
-prettyBlock (BulletList blockLists) =
- "BulletList" $$
- (prettyList $ map (prettyList . map prettyBlock) blockLists)
-prettyBlock (DefinitionList items) = "DefinitionList" $$
- (prettyList $ map deflistitem items)
- where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <>
- nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")"
-prettyBlock (Table caption aligns widths header rows) =
- "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <>
- text (show widths) $$
- prettyRow header $$
- prettyList (map prettyRow rows)
- where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols)
-prettyBlock (Div attr blocks) =
- text ("Div " <> show attr) $$ prettyList (map prettyBlock blocks)
-prettyBlock block = text $ show block
-
--- | Prettyprint Pandoc document.
-writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeNative opts (Pandoc meta blocks) = return $
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- withHead = case writerTemplate opts of
- Just _ -> \bs -> text ("Pandoc (" ++ show meta ++ ")") $$
- bs $$ cr
- Nothing -> id
- in render colwidth $ withHead $ prettyList $ map prettyBlock blocks
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
deleted file mode 100644
index ee5fa4c24..000000000
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ /dev/null
@@ -1,210 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables #-}
-{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.ODT
- Copyright : Copyright (C) 2008-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to ODT.
--}
-module Text.Pandoc.Writers.ODT ( writeODT ) where
-import Data.List ( isPrefixOf )
-import Data.Maybe ( fromMaybe )
-import Text.XML.Light.Output
-import Text.TeXMath
-import qualified Data.ByteString.Lazy as B
-import Text.Pandoc.UTF8 ( fromStringLazy )
-import Codec.Archive.Zip
-import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
-import Text.Pandoc.Shared ( stringify )
-import Text.Pandoc.ImageSize
-import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.Writers.Shared ( fixDisplayMath )
-import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
-import Control.Monad.State
-import Control.Monad.Except (runExceptT)
-import Text.Pandoc.Error (PandocError)
-import Text.Pandoc.XML
-import Text.Pandoc.Pretty
-import System.FilePath ( takeExtension, takeDirectory, (<.>))
-import Text.Pandoc.Class ( PandocMonad, report )
-import qualified Text.Pandoc.Class as P
-import Text.Pandoc.Logging
-
-data ODTState = ODTState { stEntries :: [Entry]
- }
-
-type O m = StateT ODTState m
-
--- | Produce an ODT file from a Pandoc document.
-writeODT :: PandocMonad m
- => WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> m B.ByteString
-writeODT opts doc =
- let initState = ODTState{ stEntries = []
- }
- in
- evalStateT (pandocToODT opts doc) initState
-
--- | Produce an ODT file from a Pandoc document.
-pandocToODT :: PandocMonad m
- => WriterOptions -- ^ Writer options
- -> Pandoc -- ^ Document to convert
- -> O m B.ByteString
-pandocToODT opts doc@(Pandoc meta _) = do
- let datadir = writerUserDataDir opts
- let title = docTitle meta
- refArchive <-
- case writerReferenceDoc opts of
- Just f -> liftM toArchive $ lift $ P.readFileLazy f
- Nothing -> lift $ (toArchive . B.fromStrict) <$>
- P.readDataFile datadir "reference.odt"
- -- handle formulas and pictures
- -- picEntriesRef <- P.newIORef ([] :: [Entry])
- doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc
- newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc'
- epochtime <- floor `fmap` (lift P.getPOSIXTime)
- let contentEntry = toEntry "content.xml" epochtime
- $ fromStringLazy newContents
- picEntries <- gets stEntries
- let archive = foldr addEntryToArchive refArchive
- $ contentEntry : picEntries
- -- construct META-INF/manifest.xml based on archive
- let toFileEntry fp = case getMimeType fp of
- Nothing -> empty
- Just m -> selfClosingTag "manifest:file-entry"
- [("manifest:media-type", m)
- ,("manifest:full-path", fp)
- ,("manifest:version", "1.2")
- ]
- let files = [ ent | ent <- filesInArchive archive,
- not ("META-INF" `isPrefixOf` ent) ]
- let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive,
- "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ]
- let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
- $ fromStringLazy $ render Nothing
- $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- $$
- ( inTags True "manifest:manifest"
- [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")
- ,("manifest:version","1.2")]
- $ ( selfClosingTag "manifest:file-entry"
- [("manifest:media-type","application/vnd.oasis.opendocument.text")
- ,("manifest:full-path","/")]
- $$ vcat ( map toFileEntry $ files )
- $$ vcat ( map toFileEntry $ formulas )
- )
- )
- let archive' = addEntryToArchive manifestEntry archive
- let metaEntry = toEntry "meta.xml" epochtime
- $ fromStringLazy $ render Nothing
- $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- $$
- ( inTags True "office:document-meta"
- [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0")
- ,("xmlns:xlink","http://www.w3.org/1999/xlink")
- ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
- ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0")
- ,("xmlns:ooo","http://openoffice.org/2004/office")
- ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
- ,("office:version","1.2")]
- $ ( inTagsSimple "office:meta"
- $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title))
- )
- )
- )
- -- make sure mimetype is first
- let mimetypeEntry = toEntry "mimetype" epochtime
- $ fromStringLazy "application/vnd.oasis.opendocument.text"
- let archive'' = addEntryToArchive mimetypeEntry
- $ addEntryToArchive metaEntry archive'
- return $ fromArchive archive''
-
--- | transform both Image and Math elements
-transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline
-transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = do
- res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
- case res of
- Left (_ :: PandocError) -> do
- report $ CouldNotFetchResource src ""
- return $ Emph lab
- Right (img, mbMimeType) -> do
- (ptX, ptY) <- case imageSize img of
- Right s -> return $ sizeInPoints s
- Left msg -> do
- report $ CouldNotDetermineImageSize src msg
- return (100, 100)
- let dims =
- case (getDim Width, getDim Height) of
- (Just w, Just h) -> [("width", show w), ("height", show h)]
- (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")]
- (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)]
- (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")]
- (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)]
- _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")]
- where
- ratio = ptX / ptY
- getDim dir = case (dimension dir attr) of
- Just (Percent i) -> Just $ Percent i
- Just dim -> Just $ Inch $ inInch opts dim
- Nothing -> Nothing
- let newattr = (id', cls, dims)
- entries <- gets stEntries
- let extension = fromMaybe (takeExtension $ takeWhile (/='?') src)
- (mbMimeType >>= extensionFromMimeType)
- let newsrc = "Pictures/" ++ show (length entries) <.> extension
- let toLazy = B.fromChunks . (:[])
- epochtime <- floor `fmap` (lift P.getPOSIXTime)
- let entry = toEntry newsrc epochtime $ toLazy img
- modify $ \st -> st{ stEntries = entry : entries }
- return $ Image newattr lab (newsrc, t)
-transformPicMath _ (Math t math) = do
- entries <- gets stEntries
- let dt = if t == InlineMath then DisplayInline else DisplayBlock
- case writeMathML dt <$> readTeX math of
- Left _ -> return $ Math t math
- Right r -> do
- let conf = useShortEmptyTags (const False) defaultConfigPP
- let mathml = ppcTopElement conf r
- epochtime <- floor `fmap` (lift $ P.getPOSIXTime)
- let dirname = "Formula-" ++ show (length entries) ++ "/"
- let fname = dirname ++ "content.xml"
- let entry = toEntry fname epochtime (fromStringLazy mathml)
- modify $ \st -> st{ stEntries = entry : entries }
- return $ RawInline (Format "opendocument") $ render Nothing $
- inTags False "draw:frame" [("text:anchor-type",
- if t == DisplayMath
- then "paragraph"
- else "as-char")
- ,("style:vertical-pos", "middle")
- ,("style:vertical-rel", "text")] $
- selfClosingTag "draw:object" [("xlink:href", dirname)
- , ("xlink:type", "simple")
- , ("xlink:show", "embed")
- , ("xlink:actuate", "onLoad")]
-
-transformPicMath _ x = return x
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
deleted file mode 100644
index bc0cfc300..000000000
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ /dev/null
@@ -1,103 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-
-Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.OPML
- Copyright : Copyright (C) 2013-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to OPML XML.
--}
-module Text.Pandoc.Writers.OPML ( writeOPML) where
-import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Writers.Markdown (writeMarkdown)
-import Text.Pandoc.Pretty
-import Text.Pandoc.Compat.Time
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Error
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Class (PandocMonad)
-
--- | Convert Pandoc document to string in OPML format.
-writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeOPML opts (Pandoc meta blocks) = do
- let elements = hierarchicalize blocks
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta
- metadata <- metaToJSON opts
- (writeMarkdown def . Pandoc nullMeta)
- (\ils -> trimr <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils]))
- meta'
- main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements)
- let context = defField "body" main metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
-
-writeHtmlInlines :: PandocMonad m => [Inline] -> m String
-writeHtmlInlines ils =
- trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils])
-
--- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT
-showDateTimeRFC822 :: UTCTime -> String
-showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z"
-
-convertDate :: [Inline] -> String
-convertDate ils = maybe "" showDateTimeRFC822 $
-#if MIN_VERSION_time(1,5,0)
- parseTimeM True
-#else
- parseTime
-#endif
- defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils)
-
--- | Convert an Element to OPML.
-elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc
-elementToOPML _ (Blk _) = return empty
-elementToOPML opts (Sec _ _num _ title elements) = do
- let isBlk :: Element -> Bool
- isBlk (Blk _) = True
- isBlk _ = False
-
- fromBlk :: PandocMonad m => Element -> m Block
- fromBlk (Blk x) = return x
- fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block"
-
- (blocks, rest) = span isBlk elements
- htmlIls <- writeHtmlInlines title
- md <- if null blocks
- then return []
- else do blks <- mapM fromBlk blocks
- writeMarkdown def $ Pandoc nullMeta blks
- let attrs = [("text", htmlIls)] ++ [("_note", md) | not (null blocks)]
- o <- mapM (elementToOPML opts) rest
- return $ inTags True "outline" attrs $ vcat o
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
deleted file mode 100644
index 851e18b8e..000000000
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ /dev/null
@@ -1,626 +0,0 @@
-{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-}
-{-
-Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it>
- and John MacFarlane.
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.OpenDocument
- Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to OpenDocument XML.
--}
-module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.XML
-import Text.Pandoc.Shared (linesToPara)
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.Math
-import Text.Pandoc.Pretty
-import Text.Printf ( printf )
-import Control.Arrow ( (***), (>>>) )
-import Control.Monad.State hiding ( when )
-import Data.Char (chr)
-import qualified Data.Set as Set
-import qualified Data.Map as Map
-import Text.Pandoc.Writers.Shared
-import Data.List (sortBy)
-import Data.Ord (comparing)
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
--- | Auxiliary function to convert Plain block to Para.
-plainToPara :: Block -> Block
-plainToPara (Plain x) = Para x
-plainToPara x = x
-
---
--- OpenDocument writer
---
-
-type OD m = StateT WriterState m
-
-data WriterState =
- WriterState { stNotes :: [Doc]
- , stTableStyles :: [Doc]
- , stParaStyles :: [Doc]
- , stListStyles :: [(Int, [Doc])]
- , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc)
- , stTextStyleAttr :: Set.Set TextStyle
- , stIndentPara :: Int
- , stInDefinition :: Bool
- , stTight :: Bool
- , stFirstPara :: Bool
- , stImageId :: Int
- }
-
-defaultWriterState :: WriterState
-defaultWriterState =
- WriterState { stNotes = []
- , stTableStyles = []
- , stParaStyles = []
- , stListStyles = []
- , stTextStyles = Map.empty
- , stTextStyleAttr = Set.empty
- , stIndentPara = 0
- , stInDefinition = False
- , stTight = False
- , stFirstPara = False
- , stImageId = 1
- }
-
-when :: Bool -> Doc -> Doc
-when p a = if p then a else empty
-
-addTableStyle :: PandocMonad m => Doc -> OD m ()
-addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s }
-
-addNote :: PandocMonad m => Doc -> OD m ()
-addNote i = modify $ \s -> s { stNotes = i : stNotes s }
-
-addParaStyle :: PandocMonad m => Doc -> OD m ()
-addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s }
-
-addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m ()
-addTextStyle attrs i = modify $ \s ->
- s { stTextStyles = Map.insert attrs i (stTextStyles s) }
-
-addTextStyleAttr :: PandocMonad m => TextStyle -> OD m ()
-addTextStyleAttr t = modify $ \s ->
- s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) }
-
-increaseIndent :: PandocMonad m => OD m ()
-increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s }
-
-resetIndent :: PandocMonad m => OD m ()
-resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 }
-
-inTightList :: PandocMonad m => OD m a -> OD m a
-inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r ->
- modify (\s -> s { stTight = False }) >> return r
-
-setInDefinitionList :: PandocMonad m => Bool -> OD m ()
-setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
-
-setFirstPara :: PandocMonad m => OD m ()
-setFirstPara = modify $ \s -> s { stFirstPara = True }
-
-inParagraphTags :: PandocMonad m => Doc -> OD m Doc
-inParagraphTags d | isEmpty d = return empty
-inParagraphTags d = do
- b <- gets stFirstPara
- a <- if b
- then do modify $ \st -> st { stFirstPara = False }
- return $ [("text:style-name", "First_20_paragraph")]
- else return [("text:style-name", "Text_20_body")]
- return $ inTags False "text:p" a d
-
-inParagraphTagsWithStyle :: String -> Doc -> Doc
-inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
-
-inSpanTags :: String -> Doc -> Doc
-inSpanTags s = inTags False "text:span" [("text:style-name",s)]
-
-withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a
-withTextStyle s f = do
- oldTextStyleAttr <- gets stTextStyleAttr
- addTextStyleAttr s
- res <- f
- modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr }
- return res
-
-inTextStyle :: PandocMonad m => Doc -> OD m Doc
-inTextStyle d = do
- at <- gets stTextStyleAttr
- if Set.null at
- then return d
- else do
- styles <- gets stTextStyles
- case Map.lookup at styles of
- Just (styleName, _) -> return $
- inTags False "text:span" [("text:style-name",styleName)] d
- Nothing -> do
- let styleName = "T" ++ show (Map.size styles + 1)
- addTextStyle at (styleName,
- inTags False "style:style"
- [("style:name", styleName)
- ,("style:family", "text")]
- $ selfClosingTag "style:text-properties"
- (concatMap textStyleAttr (Set.toList at)))
- return $ inTags False
- "text:span" [("text:style-name",styleName)] d
-
-inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc
-inHeaderTags i d =
- return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
- , ("text:outline-level", show i)] d
-
-inQuotes :: QuoteType -> Doc -> Doc
-inQuotes SingleQuote s = char '\8216' <> s <> char '\8217'
-inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221'
-
-handleSpaces :: String -> Doc
-handleSpaces s
- | ( ' ':_) <- s = genTag s
- | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x
- | otherwise = rm s
- where
- genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>)
- tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)]
- rm ( ' ':xs) = char ' ' <> genTag xs
- rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs
- rm ( x:xs) = char x <> rm xs
- rm [] = empty
-
--- | Convert Pandoc document to string in OpenDocument format.
-writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeOpenDocument opts (Pandoc meta blocks) = do
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- let render' = render colwidth
- ((body, metadata),s) <- flip runStateT
- defaultWriterState $ do
- m <- metaToJSON opts
- (fmap (render colwidth) . blocksToOpenDocument opts)
- (fmap (render colwidth) . inlinesToOpenDocument opts)
- meta
- b <- render' `fmap` blocksToOpenDocument opts blocks
- return (b, m)
- let styles = stTableStyles s ++ stParaStyles s ++
- map snd (reverse $ sortBy (comparing fst) $
- Map.elems (stTextStyles s))
- listStyle (n,l) = inTags True "text:list-style"
- [("style:name", "L" ++ show n)] (vcat l)
- let listStyles = map listStyle (stListStyles s)
- let automaticStyles = vcat $ reverse $ styles ++ listStyles
- let context = defField "body" body
- $ defField "automatic-styles" (render' automaticStyles)
- $ metadata
- return $ case writerTemplate opts of
- Nothing -> body
- Just tpl -> renderTemplate' tpl context
-
-withParagraphStyle :: PandocMonad m
- => WriterOptions -> String -> [Block] -> OD m Doc
-withParagraphStyle o s (b:bs)
- | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l
- | otherwise = go =<< blockToOpenDocument o b
- where go i = (<>) i <$> withParagraphStyle o s bs
-withParagraphStyle _ _ [] = return empty
-
-inPreformattedTags :: PandocMonad m => String -> OD m Doc
-inPreformattedTags s = do
- n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")]
- return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s
-
-orderedListToOpenDocument :: PandocMonad m
- => WriterOptions -> Int -> [[Block]] -> OD m Doc
-orderedListToOpenDocument o pn bs =
- vcat . map (inTagsIndented "text:list-item") <$>
- mapM (orderedItemToOpenDocument o pn . map plainToPara) bs
-
-orderedItemToOpenDocument :: PandocMonad m
- => WriterOptions -> Int -> [Block] -> OD m Doc
-orderedItemToOpenDocument o n (b:bs)
- | OrderedList a l <- b = newLevel a l
- | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l
- | otherwise = go =<< blockToOpenDocument o b
- where
- go i = ($$) i <$> orderedItemToOpenDocument o n bs
- newLevel a l = do
- nn <- length <$> gets stParaStyles
- ls <- head <$> gets stListStyles
- modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) }
- inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l
-orderedItemToOpenDocument _ _ [] = return empty
-
-isTightList :: [[Block]] -> Bool
-isTightList [] = False
-isTightList (b:_)
- | Plain {} : _ <- b = True
- | otherwise = False
-
-newOrderedListStyle :: PandocMonad m
- => Bool -> ListAttributes -> OD m (Int,Int)
-newOrderedListStyle b a = do
- ln <- (+) 1 . length <$> gets stListStyles
- let nbs = orderedListLevelStyle a (ln, [])
- pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln
- modify $ \s -> s { stListStyles = nbs : stListStyles s }
- return (ln,pn)
-
-bulletListToOpenDocument :: PandocMonad m
- => WriterOptions -> [[Block]] -> OD m Doc
-bulletListToOpenDocument o b = do
- ln <- (+) 1 . length <$> gets stListStyles
- (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln
- modify $ \s -> s { stListStyles = ns : stListStyles s }
- is <- listItemsToOpenDocument ("P" ++ show pn) o b
- return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is
-
-listItemsToOpenDocument :: PandocMonad m
- => String -> WriterOptions -> [[Block]] -> OD m Doc
-listItemsToOpenDocument s o is =
- vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is
-
-deflistItemToOpenDocument :: PandocMonad m
- => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc
-deflistItemToOpenDocument o (t,d) = do
- let ts = if isTightList d
- then "Definition_20_Term_20_Tight" else "Definition_20_Term"
- ds = if isTightList d
- then "Definition_20_Definition_20_Tight" else "Definition_20_Definition"
- t' <- withParagraphStyle o ts [Para t]
- d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d
- return $ t' $$ d'
-
-inBlockQuote :: PandocMonad m
- => WriterOptions -> Int -> [Block] -> OD m Doc
-inBlockQuote o i (b:bs)
- | BlockQuote l <- b = do increaseIndent
- ni <- paraStyle
- [("style:parent-style-name","Quotations")]
- go =<< inBlockQuote o ni (map plainToPara l)
- | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l
- | otherwise = do go =<< blockToOpenDocument o b
- where go block = ($$) block <$> inBlockQuote o i bs
-inBlockQuote _ _ [] = resetIndent >> return empty
-
--- | Convert a list of Pandoc blocks to OpenDocument.
-blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc
-blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
-
--- | Convert a Pandoc block element to OpenDocument.
-blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc
-blockToOpenDocument o bs
- | Plain b <- bs = if null b
- then return empty
- else inParagraphTags =<< inlinesToOpenDocument o b
- | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs
- = figure attr c s t
- | Para b <- bs = if null b
- then return empty
- else inParagraphTags =<< inlinesToOpenDocument o b
- | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b
- | Div _ xs <- bs = blocksToOpenDocument o xs
- | Header i _ b <- bs = setFirstPara >>
- (inHeaderTags i =<< inlinesToOpenDocument o b)
- | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
- | DefinitionList b <- bs = setFirstPara >> defList b
- | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b
- | OrderedList a b <- bs = setFirstPara >> orderedList a b
- | CodeBlock _ s <- bs = setFirstPara >> preformatted s
- | Table c a w h r <- bs = setFirstPara >> table c a w h r
- | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p"
- [ ("text:style-name", "Horizontal_20_Line") ])
- | RawBlock f s <- bs = if f == Format "opendocument"
- then return $ text s
- else do
- report $ BlockNotRendered bs
- return empty
- | Null <- bs = return empty
- | otherwise = return empty
- where
- defList b = do setInDefinitionList True
- r <- vcat <$> mapM (deflistItemToOpenDocument o) b
- setInDefinitionList False
- return r
- preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s)
- mkBlockQuote b = do increaseIndent
- i <- paraStyle
- [("style:parent-style-name","Quotations")]
- inBlockQuote o i (map plainToPara b)
- orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a
- inTags True "text:list" [ ("text:style-name", "L" ++ show ln)]
- <$> orderedListToOpenDocument o pn b
- table c a w h r = do
- tn <- length <$> gets stTableStyles
- pn <- length <$> gets stParaStyles
- let genIds = map chr [65..]
- name = "Table" ++ show (tn + 1)
- columnIds = zip genIds w
- mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])]
- columns = map mkColumn columnIds
- paraHStyles = paraTableStyles "Heading" pn a
- paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a
- newPara = map snd . filter (not . isEmpty . snd)
- addTableStyle $ tableStyle tn columnIds
- mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
- captionDoc <- if null c
- then return empty
- else withParagraphStyle o "Table" [Para c]
- th <- if all null h
- then return empty
- else colHeadsToOpenDocument o name (map fst paraHStyles) h
- tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r
- return $ inTags True "table:table" [ ("table:name" , name)
- , ("table:style-name", name)
- ] (vcat columns $$ th $$ vcat tr) $$ captionDoc
- figure attr caption source title | null caption =
- withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]]
- | otherwise = do
- imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]]
- captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
- return $ imageDoc $$ captionDoc
-
-colHeadsToOpenDocument :: PandocMonad m
- => WriterOptions -> String -> [String] -> [[Block]]
- -> OD m Doc
-colHeadsToOpenDocument o tn ns hs =
- inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$>
- mapM (tableItemToOpenDocument o tn) (zip ns hs)
-
-tableRowToOpenDocument :: PandocMonad m
- => WriterOptions -> String -> [String] -> [[Block]]
- -> OD m Doc
-tableRowToOpenDocument o tn ns cs =
- inTagsIndented "table:table-row" . vcat <$>
- mapM (tableItemToOpenDocument o tn) (zip ns cs)
-
-tableItemToOpenDocument :: PandocMonad m
- => WriterOptions -> String -> (String,[Block])
- -> OD m Doc
-tableItemToOpenDocument o tn (n,i) =
- let a = [ ("table:style-name" , tn ++ ".A1" )
- , ("office:value-type", "string" )
- ]
- in inTags True "table:table-cell" a <$>
- withParagraphStyle o n (map plainToPara i)
-
--- | Convert a list of inline elements to OpenDocument.
-inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc
-inlinesToOpenDocument o l = hcat <$> toChunks o l
-
-toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc]
-toChunks _ [] = return []
-toChunks o (x : xs)
- | isChunkable x = do
- contents <- (inTextStyle . hcat) =<<
- mapM (inlineToOpenDocument o) (x:ys)
- rest <- toChunks o zs
- return (contents : rest)
- | otherwise = do
- contents <- inlineToOpenDocument o x
- rest <- toChunks o xs
- return (contents : rest)
- where (ys, zs) = span isChunkable xs
-
-isChunkable :: Inline -> Bool
-isChunkable (Str _) = True
-isChunkable Space = True
-isChunkable SoftBreak = True
-isChunkable _ = False
-
--- | Convert an inline element to OpenDocument.
-inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc
-inlineToOpenDocument o ils
- = case ils of
- Space -> return space
- SoftBreak
- | writerWrapText o == WrapPreserve
- -> return $ preformatted "\n"
- | otherwise -> return $ space
- Span _ xs -> inlinesToOpenDocument o xs
- LineBreak -> return $ selfClosingTag "text:line-break" []
- Str s -> return $ handleSpaces $ escapeStringForXML s
- Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l
- Strong l -> withTextStyle Bold $ inlinesToOpenDocument o l
- Strikeout l -> withTextStyle Strike $ inlinesToOpenDocument o l
- Superscript l -> withTextStyle Sup $ inlinesToOpenDocument o l
- Subscript l -> withTextStyle Sub $ inlinesToOpenDocument o l
- SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l
- Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l
- Code _ s -> inlinedCode $ preformatted s
- Math t s -> lift (texMathToInlines t s) >>=
- inlinesToOpenDocument o
- Cite _ l -> inlinesToOpenDocument o l
- RawInline f s -> if f == Format "opendocument"
- then return $ text s
- else do
- report $ InlineNotRendered ils
- return empty
- Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l
- Image attr _ (s,t) -> mkImg attr s t
- Note l -> mkNote l
- where
- preformatted s = handleSpaces $ escapeStringForXML s
- inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s
- mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple")
- , ("xlink:href" , s )
- , ("office:name", t )
- ] . inSpanTags "Definition"
- mkImg (_, _, kvs) s _ = do
- id' <- gets stImageId
- modify (\st -> st{ stImageId = id' + 1 })
- let getDims [] = []
- getDims (("width", w) :xs) = ("svg:width", w) : getDims xs
- getDims (("height", h):xs) = ("svg:height", h) : getDims xs
- getDims (x@("style:rel-width", _) :xs) = x : getDims xs
- getDims (x@("style:rel-height", _):xs) = x : getDims xs
- getDims (_:xs) = getDims xs
- return $ inTags False "draw:frame"
- (("draw:name", "img" ++ show id') : getDims kvs) $
- selfClosingTag "draw:image" [ ("xlink:href" , s )
- , ("xlink:type" , "simple")
- , ("xlink:show" , "embed" )
- , ("xlink:actuate", "onLoad")]
- mkNote l = do
- n <- length <$> gets stNotes
- let footNote t = inTags False "text:note"
- [ ("text:id" , "ftn" ++ show n)
- , ("text:note-class", "footnote" )] $
- inTagsSimple "text:note-citation" (text . show $ n + 1) <>
- inTagsSimple "text:note-body" t
- nn <- footNote <$> withParagraphStyle o "Footnote" l
- addNote nn
- return nn
-
-bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc]))
-bulletListStyle l = do
- let doStyles i = inTags True "text:list-level-style-bullet"
- [ ("text:level" , show (i + 1) )
- , ("text:style-name" , "Bullet_20_Symbols")
- , ("style:num-suffix", "." )
- , ("text:bullet-char", [bulletList !! i] )
- ] (listLevelStyle (1 + i))
- bulletList = map chr $ cycle [8226,8227,8259]
- listElStyle = map doStyles [0..9]
- pn <- paraListStyle l
- return (pn, (l, listElStyle))
-
-orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc])
-orderedListLevelStyle (s,n, d) (l,ls) =
- let suffix = case d of
- OneParen -> [("style:num-suffix", ")")]
- TwoParens -> [("style:num-prefix", "(")
- ,("style:num-suffix", ")")]
- _ -> [("style:num-suffix", ".")]
- format = case n of
- UpperAlpha -> "A"
- LowerAlpha -> "a"
- UpperRoman -> "I"
- LowerRoman -> "i"
- _ -> "1"
- listStyle = inTags True "text:list-level-style-number"
- ([ ("text:level" , show $ 1 + length ls )
- , ("text:style-name" , "Numbering_20_Symbols")
- , ("style:num-format", format )
- , ("text:start-value", show s )
- ] ++ suffix) (listLevelStyle (1 + length ls))
- in (l, ls ++ [listStyle])
-
-listLevelStyle :: Int -> Doc
-listLevelStyle i =
- let indent = show (0.25 * fromIntegral i :: Double) in
- selfClosingTag "style:list-level-properties"
- [ ("text:space-before" , indent ++ "in")
- , ("text:min-label-width", "0.25in")]
-
-tableStyle :: Int -> [(Char,Double)] -> Doc
-tableStyle num wcs =
- let tableId = "Table" ++ show (num + 1)
- table = inTags True "style:style"
- [("style:name", tableId)
- ,("style:family", "table")] $
- selfClosingTag "style:table-properties"
- [("table:align" , "center")]
- colStyle (c,0) = selfClosingTag "style:style"
- [ ("style:name" , tableId ++ "." ++ [c])
- , ("style:family", "table-column" )]
- colStyle (c,w) = inTags True "style:style"
- [ ("style:name" , tableId ++ "." ++ [c])
- , ("style:family", "table-column" )] $
- selfClosingTag "style:table-column-properties"
- [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))]
- cellStyle = inTags True "style:style"
- [ ("style:name" , tableId ++ ".A1")
- , ("style:family", "table-cell" )] $
- selfClosingTag "style:table-cell-properties"
- [ ("fo:border", "none")]
- columnStyles = map colStyle wcs
- in table $$ vcat columnStyles $$ cellStyle
-
-paraStyle :: PandocMonad m => [(String,String)] -> OD m Int
-paraStyle attrs = do
- pn <- (+) 1 . length <$> gets stParaStyles
- i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara
- b <- gets stInDefinition
- t <- gets stTight
- let styleAttr = [ ("style:name" , "P" ++ show pn)
- , ("style:family" , "paragraph" )]
- indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i
- tight = if t then [ ("fo:margin-top" , "0in" )
- , ("fo:margin-bottom" , "0in" )]
- else []
- indent = if (i /= 0 || b)
- then [ ("fo:margin-left" , indentVal)
- , ("fo:margin-right" , "0in" )
- , ("fo:text-indent" , "0in" )
- , ("style:auto-text-indent" , "false" )]
- else []
- attributes = indent ++ tight
- paraProps = when (not $ null attributes) $
- selfClosingTag "style:paragraph-properties" attributes
- addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps
- return pn
-
-paraListStyle :: PandocMonad m => Int -> OD m Int
-paraListStyle l = paraStyle
- [("style:parent-style-name","Text_20_body")
- ,("style:list-style-name", "L" ++ show l )]
-
-paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)]
-paraTableStyles _ _ [] = []
-paraTableStyles t s (a:xs)
- | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs
- | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs
- | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs
- where pName sn = "P" ++ show (sn + 1)
- res sn x = inTags True "style:style"
- [ ("style:name" , pName sn )
- , ("style:family" , "paragraph" )
- , ("style:parent-style-name", "Table_20_" ++ t)] $
- selfClosingTag "style:paragraph-properties"
- [ ("fo:text-align", x)
- , ("style:justify-single-word", "false")]
-
-data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
- deriving ( Eq,Ord )
-
-textStyleAttr :: TextStyle -> [(String,String)]
-textStyleAttr s
- | Italic <- s = [("fo:font-style" ,"italic" )
- ,("style:font-style-asian" ,"italic" )
- ,("style:font-style-complex" ,"italic" )]
- | Bold <- s = [("fo:font-weight" ,"bold" )
- ,("style:font-weight-asian" ,"bold" )
- ,("style:font-weight-complex" ,"bold" )]
- | Strike <- s = [("style:text-line-through-style", "solid" )]
- | Sub <- s = [("style:text-position" ,"sub 58%" )]
- | Sup <- s = [("style:text-position" ,"super 58%" )]
- | SmallC <- s = [("fo:font-variant" ,"small-caps")]
- | Pre <- s = [("style:font-name" ,"Courier New")
- ,("style:font-name-asian" ,"Courier New")
- ,("style:font-name-complex" ,"Courier New")]
- | otherwise = []
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
deleted file mode 100644
index 55d3fe656..000000000
--- a/src/Text/Pandoc/Writers/Org.hs
+++ /dev/null
@@ -1,411 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
- Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>,
- and John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Org
- Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : Puneeth Chaganti <punchagan@gmail.com>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to Emacs Org-Mode.
-
-Org-Mode: <http://orgmode.org>
--}
-module Text.Pandoc.Writers.Org ( writeOrg) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Pretty
-import Text.Pandoc.Templates (renderTemplate')
-import Data.Char ( isAlphaNum, toLower )
-import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose )
-import Control.Monad.State
-import Text.Pandoc.Class (PandocMonad)
-
-data WriterState =
- WriterState { stNotes :: [[Block]]
- , stHasMath :: Bool
- , stOptions :: WriterOptions
- }
-
--- | Convert Pandoc to Org.
-writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeOrg opts document = return $
- let st = WriterState { stNotes = [],
- stHasMath = False,
- stOptions = opts }
- in evalState (pandocToOrg document) st
-
--- | Return Org representation of document.
-pandocToOrg :: Pandoc -> State WriterState String
-pandocToOrg (Pandoc meta blocks) = do
- opts <- liftM stOptions get
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToOrg)
- (fmap (render colwidth) . inlineListToOrg)
- meta
- body <- blockListToOrg blocks
- notes <- liftM (reverse . stNotes) get >>= notesToOrg
- hasMath <- liftM stHasMath get
- let main = render colwidth $ foldl ($+$) empty $ [body, notes]
- let context = defField "body" main
- $ defField "math" hasMath
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Return Org representation of notes.
-notesToOrg :: [[Block]] -> State WriterState Doc
-notesToOrg notes =
- mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
- return . vsep
-
--- | Return Org representation of a note.
-noteToOrg :: Int -> [Block] -> State WriterState Doc
-noteToOrg num note = do
- contents <- blockListToOrg note
- let marker = "[fn:" ++ show num ++ "] "
- return $ hang (length marker) (text marker) contents
-
--- | Escape special characters for Org.
-escapeString :: String -> String
-escapeString = escapeStringUsing $
- [ ('\x2014',"---")
- , ('\x2013',"--")
- , ('\x2019',"'")
- , ('\x2026',"...")
- ] ++ backslashEscapes "^_"
-
-isRawFormat :: Format -> Bool
-isRawFormat f =
- f == Format "latex" || f == Format "tex" || f == Format "org"
-
--- | Convert Pandoc block element to Org.
-blockToOrg :: Block -- ^ Block element
- -> State WriterState Doc
-blockToOrg Null = return empty
-blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do
- contents <- blockListToOrg bs
- let drawerNameTag = ":" <> text cls <> ":"
- let keys = vcat $ map (\(k,v) ->
- ":" <> text k <> ":"
- <> space <> text v) kvs
- let drawerEndTag = text ":END:"
- return $ drawerNameTag $$ cr $$ keys $$
- blankline $$ contents $$
- blankline $$ drawerEndTag $$
- blankline
-blockToOrg (Div attrs bs) = do
- contents <- blockListToOrg bs
- let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower
- return $ case attrs of
- ("", [], []) ->
- -- nullAttr, treat contents as if it wasn't wrapped
- blankline $$ contents $$ blankline
- (ident, [], []) ->
- -- only an id: add id as an anchor, unwrap the rest
- blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline
- (ident, classes, kv) ->
- -- if one class looks like the name of a greater block then output as
- -- such: The ID, if present, is added via the #+NAME keyword; other
- -- classes and key-value pairs are kept as #+ATTR_HTML attributes.
- let
- (blockTypeCand, classes') = partition isGreaterBlockClass classes
- in case blockTypeCand of
- (blockType:classes'') ->
- blankline $$ attrHtml (ident, classes'' <> classes', kv) $$
- "#+BEGIN_" <> text blockType $$ contents $$
- "#+END_" <> text blockType $$ blankline
- _ ->
- -- fallback: wrap in div tags
- let
- startTag = tagWithAttrs "div" attrs
- endTag = text "</div>"
- in blankline $$ "#+BEGIN_HTML" $$
- nest 2 startTag $$ "#+END_HTML" $$ blankline $$
- contents $$ blankline $$ "#+BEGIN_HTML" $$
- nest 2 endTag $$ "#+END_HTML" $$ blankline
-blockToOrg (Plain inlines) = inlineListToOrg inlines
--- title beginning with fig: indicates that the image is a figure
-blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- if null txt
- then return empty
- else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt
- img <- inlineToOrg (Image attr txt (src,tit))
- return $ capt $$ img $$ blankline
-blockToOrg (Para inlines) = do
- contents <- inlineListToOrg inlines
- return $ contents <> blankline
-blockToOrg (LineBlock lns) = do
- let splitStanza [] = []
- splitStanza xs = case break (== mempty) xs of
- (l, []) -> l : []
- (l, _:r) -> l : splitStanza r
- let joinWithLinefeeds = nowrap . mconcat . intersperse cr
- let joinWithBlankLines = mconcat . intersperse blankline
- let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls
- contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns)
- return $ blankline $$ "#+BEGIN_VERSE" $$
- nest 2 contents $$ "#+END_VERSE" <> blankline
-blockToOrg (RawBlock "html" str) =
- return $ blankline $$ "#+BEGIN_HTML" $$
- nest 2 (text str) $$ "#+END_HTML" $$ blankline
-blockToOrg (RawBlock f str) | isRawFormat f =
- return $ text str
-blockToOrg (RawBlock _ _) = return empty
-blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
-blockToOrg (Header level attr inlines) = do
- contents <- inlineListToOrg inlines
- let headerStr = text $ if level > 999 then " " else replicate level '*'
- let drawerStr = if attr == nullAttr
- then empty
- else cr <> nest (level + 1) (propertiesDrawer attr)
- return $ headerStr <> " " <> contents <> drawerStr <> blankline
-blockToOrg (CodeBlock (_,classes,_) str) = do
- opts <- stOptions <$> get
- let tabstop = writerTabStop opts
- let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers
- let (beg, end) = case at of
- [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE")
- (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC")
- return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline
-blockToOrg (BlockQuote blocks) = do
- contents <- blockListToOrg blocks
- return $ blankline $$ "#+BEGIN_QUOTE" $$
- nest 2 contents $$ "#+END_QUOTE" $$ blankline
-blockToOrg (Table caption' _ _ headers rows) = do
- caption'' <- inlineListToOrg caption'
- let caption = if null caption'
- then empty
- else ("#+CAPTION: " <> caption'')
- headers' <- mapM blockListToOrg headers
- rawRows <- mapM (mapM blockListToOrg) rows
- let numChars = maximum . map offset
- -- FIXME: width is not being used.
- let widthsInChars =
- map ((+2) . numChars) $ transpose (headers' : rawRows)
- -- FIXME: Org doesn't allow blocks with height more than 1.
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = maximum (1 : map height blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- rows' <- mapM (\row -> do cols <- mapM blockListToOrg row
- return $ makeRow cols) rows
- let border ch = char '|' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '|'
- let body = vcat rows'
- let head'' = if all null headers
- then empty
- else head' $$ border '-'
- return $ head'' $$ body $$ caption $$ blankline
-blockToOrg (BulletList items) = do
- contents <- mapM bulletListItemToOrg items
- -- ensure that sublists have preceding blank line
- return $ blankline $+$ vcat contents $$ blankline
-blockToOrg (OrderedList (start, _, delim) items) = do
- let delim' = case delim of
- TwoParens -> OneParen
- x -> x
- let markers = take (length items) $ orderedListMarkers
- (start, Decimal, delim')
- let maxMarkerLength = maximum $ map length markers
- let markers' = map (\m -> let s = maxMarkerLength - length m
- in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $
- zip markers' items
- -- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
-blockToOrg (DefinitionList items) = do
- contents <- mapM definitionListItemToOrg items
- return $ vcat contents $$ blankline
-
--- | Convert bullet list item (list of blocks) to Org.
-bulletListItemToOrg :: [Block] -> State WriterState Doc
-bulletListItemToOrg items = do
- contents <- blockListToOrg items
- return $ hang 2 "- " (contents <> cr)
-
--- | Convert ordered list item (a list of blocks) to Org.
-orderedListItemToOrg :: String -- ^ marker for list item
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToOrg marker items = do
- contents <- blockListToOrg items
- return $ hang (length marker + 1) (text marker <> space) (contents <> cr)
-
--- | Convert defintion list item (label, list of blocks) to Org.
-definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc
-definitionListItemToOrg (label, defs) = do
- label' <- inlineListToOrg label
- contents <- liftM vcat $ mapM blockListToOrg defs
- return $ hang 2 "- " $ label' <> " :: " <> (contents <> cr)
-
--- | Convert list of key/value pairs to Org :PROPERTIES: drawer.
-propertiesDrawer :: Attr -> Doc
-propertiesDrawer (ident, classes, kv) =
- let
- drawerStart = text ":PROPERTIES:"
- drawerEnd = text ":END:"
- kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv
- kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv'
- properties = vcat $ map kvToOrgProperty kv''
- in
- drawerStart <> cr <> properties <> cr <> drawerEnd
- where
- kvToOrgProperty :: (String, String) -> Doc
- kvToOrgProperty (key, value) =
- text ":" <> text key <> text ": " <> text value <> cr
-
-attrHtml :: Attr -> Doc
-attrHtml ("" , [] , []) = mempty
-attrHtml (ident, classes, kvs) =
- let
- name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr
- keyword = "#+ATTR_HTML"
- classKv = ("class", unwords classes)
- kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs)
- in name <> keyword <> ": " <> text (unwords kvStrings) <> cr
-
--- | Convert list of Pandoc block elements to Org.
-blockListToOrg :: [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat
-
--- | Convert list of Pandoc inline elements to Org.
-inlineListToOrg :: [Inline] -> State WriterState Doc
-inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
-
--- | Convert Pandoc inline element to Org.
-inlineToOrg :: Inline -> State WriterState Doc
-inlineToOrg (Span (uid, [], []) []) =
- return $ "<<" <> text uid <> ">>"
-inlineToOrg (Span _ lst) =
- inlineListToOrg lst
-inlineToOrg (Emph lst) = do
- contents <- inlineListToOrg lst
- return $ "/" <> contents <> "/"
-inlineToOrg (Strong lst) = do
- contents <- inlineListToOrg lst
- return $ "*" <> contents <> "*"
-inlineToOrg (Strikeout lst) = do
- contents <- inlineListToOrg lst
- return $ "+" <> contents <> "+"
-inlineToOrg (Superscript lst) = do
- contents <- inlineListToOrg lst
- return $ "^{" <> contents <> "}"
-inlineToOrg (Subscript lst) = do
- contents <- inlineListToOrg lst
- return $ "_{" <> contents <> "}"
-inlineToOrg (SmallCaps lst) = inlineListToOrg lst
-inlineToOrg (Quoted SingleQuote lst) = do
- contents <- inlineListToOrg lst
- return $ "'" <> contents <> "'"
-inlineToOrg (Quoted DoubleQuote lst) = do
- contents <- inlineListToOrg lst
- return $ "\"" <> contents <> "\""
-inlineToOrg (Cite _ lst) = inlineListToOrg lst
-inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
-inlineToOrg (Str str) = return $ text $ escapeString str
-inlineToOrg (Math t str) = do
- modify $ \st -> st{ stHasMath = True }
- return $ if t == InlineMath
- then "$" <> text str <> "$"
- else "$$" <> text str <> "$$"
-inlineToOrg (RawInline f@(Format f') str) =
- return $ if isRawFormat f
- then text str
- else "@@" <> text f' <> ":" <> text str <> "@@"
-inlineToOrg LineBreak = return (text "\\\\" <> cr)
-inlineToOrg Space = return space
-inlineToOrg SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- case wrapText of
- WrapPreserve -> return cr
- WrapAuto -> return space
- WrapNone -> return space
-inlineToOrg (Link _ txt (src, _)) = do
- case txt of
- [Str x] | escapeURI x == src -> -- autolink
- do return $ "[[" <> text (orgPath x) <> "]]"
- _ -> do contents <- inlineListToOrg txt
- return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]"
-inlineToOrg (Image _ _ (source, _)) = do
- return $ "[[" <> text (orgPath source) <> "]]"
-inlineToOrg (Note contents) = do
- -- add to notes in state
- notes <- get >>= (return . stNotes)
- modify $ \st -> st { stNotes = contents:notes }
- let ref = show $ (length notes) + 1
- return $ "[fn:" <> text ref <> "]"
-
-orgPath :: String -> String
-orgPath src =
- case src of
- [] -> mempty -- wiki link
- ('#':_) -> src -- internal link
- _ | isUrl src -> src
- _ | isFilePath src -> src
- _ -> "file:" <> src
- where
- isFilePath :: String -> Bool
- isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"]
-
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
--- | Translate from pandoc's programming language identifiers to those used by
--- org-mode.
-pandocLangToOrg :: String -> String
-pandocLangToOrg cs =
- case cs of
- "c" -> "C"
- "cpp" -> "C++"
- "commonlisp" -> "lisp"
- "r" -> "R"
- "bash" -> "sh"
- _ -> cs
-
--- | List of language identifiers recognized by org-mode.
-orgLangIdentifiers :: [String]
-orgLangIdentifiers =
- [ "asymptote", "awk", "C", "C++", "clojure", "css", "d", "ditaa", "dot"
- , "calc", "emacs-lisp", "fortran", "gnuplot", "haskell", "java", "js"
- , "latex", "ledger", "lisp", "lilypond", "matlab", "mscgen", "ocaml"
- , "octave", "org", "oz", "perl", "plantuml", "processing", "python", "R"
- , "ruby", "sass", "scheme", "screen", "sed", "sh", "sql", "sqlite"
- ]
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
deleted file mode 100644
index 5cce64d17..000000000
--- a/src/Text/Pandoc/Writers/RST.hs
+++ /dev/null
@@ -1,556 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.RST
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to reStructuredText.
-
-reStructuredText: <http://docutils.sourceforge.net/rst.html>
--}
-module Text.Pandoc.Writers.RST ( writeRST ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Builder (deleteMeta)
-import Data.Maybe (fromMaybe)
-import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose )
-import Network.URI (isURI)
-import Text.Pandoc.Pretty
-import Control.Monad.State
-import Data.Char (isSpace, toLower)
-import Text.Pandoc.Class (PandocMonad)
-
-type Refs = [([Inline], Target)]
-
-data WriterState =
- WriterState { stNotes :: [[Block]]
- , stLinks :: Refs
- , stImages :: [([Inline], (Attr, String, String, Maybe String))]
- , stHasMath :: Bool
- , stHasRawTeX :: Bool
- , stOptions :: WriterOptions
- , stTopLevel :: Bool
- }
-
--- | Convert Pandoc to RST.
-writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeRST opts document = return $
- let st = WriterState { stNotes = [], stLinks = [],
- stImages = [], stHasMath = False,
- stHasRawTeX = False, stOptions = opts,
- stTopLevel = True}
- in evalState (pandocToRST document) st
-
--- | Return RST representation of document.
-pandocToRST :: Pandoc -> State WriterState String
-pandocToRST (Pandoc meta blocks) = do
- opts <- liftM stOptions get
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- let subtit = case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> xs
- _ -> []
- title <- titleToRST (docTitle meta) subtit
- metadata <- metaToJSON opts
- (fmap (render colwidth) . blockListToRST)
- (fmap (trimr . render colwidth) . inlineListToRST)
- $ deleteMeta "title" $ deleteMeta "subtitle" meta
- body <- blockListToRST' True $ case writerTemplate opts of
- Just _ -> normalizeHeadings 1 blocks
- Nothing -> blocks
- notes <- liftM (reverse . stNotes) get >>= notesToRST
- -- note that the notes may contain refs, so we do them first
- refs <- liftM (reverse . stLinks) get >>= refsToRST
- pics <- liftM (reverse . stImages) get >>= pictRefsToRST
- hasMath <- liftM stHasMath get
- rawTeX <- liftM stHasRawTeX get
- let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
- let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ defField "toc-depth" (show $ writerTOCDepth opts)
- $ defField "math" hasMath
- $ defField "title" (render Nothing title :: String)
- $ defField "math" hasMath
- $ defField "rawtex" rawTeX
- $ metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
- where
- normalizeHeadings lev (Header l a i:bs) =
- Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
- where (cont,bs') = break (headerLtEq l) bs
- headerLtEq level (Header l' _ _) = l' <= level
- headerLtEq _ _ = False
- normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs
- normalizeHeadings _ [] = []
-
--- | Return RST representation of reference key table.
-refsToRST :: Refs -> State WriterState Doc
-refsToRST refs = mapM keyToRST refs >>= return . vcat
-
--- | Return RST representation of a reference key.
-keyToRST :: ([Inline], (String, String))
- -> State WriterState Doc
-keyToRST (label, (src, _)) = do
- label' <- inlineListToRST label
- let label'' = if ':' `elem` ((render Nothing label') :: String)
- then char '`' <> label' <> char '`'
- else label'
- return $ nowrap $ ".. _" <> label'' <> ": " <> text src
-
--- | Return RST representation of notes.
-notesToRST :: [[Block]] -> State WriterState Doc
-notesToRST notes =
- mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
- return . vsep
-
--- | Return RST representation of a note.
-noteToRST :: Int -> [Block] -> State WriterState Doc
-noteToRST num note = do
- contents <- blockListToRST note
- let marker = ".. [" <> text (show num) <> "]"
- return $ nowrap $ marker $$ nest 3 contents
-
--- | Return RST representation of picture reference table.
-pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))]
- -> State WriterState Doc
-pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
-
--- | Return RST representation of a picture substitution reference.
-pictToRST :: ([Inline], (Attr, String, String, Maybe String))
- -> State WriterState Doc
-pictToRST (label, (attr, src, _, mbtarget)) = do
- label' <- inlineListToRST label
- dims <- imageDimsToRST attr
- let (_, cls, _) = attr
- classes = if null cls
- then empty
- else ":class: " <> text (unwords cls)
- return $ nowrap
- $ ".. |" <> label' <> "| image:: " <> text src $$ hang 3 empty (classes $$ dims)
- $$ case mbtarget of
- Nothing -> empty
- Just t -> " :target: " <> text t
-
--- | Escape special characters for RST.
-escapeString :: WriterOptions -> String -> String
-escapeString _ [] = []
-escapeString opts (c:cs) =
- case c of
- _ | c `elem` ['\\','`','*','_','|'] -> '\\':c:escapeString opts cs
- '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
- '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
- '-' | isEnabled Ext_smart opts ->
- case cs of
- '-':_ -> '\\':'-':escapeString opts cs
- _ -> '-':escapeString opts cs
- '.' | isEnabled Ext_smart opts ->
- case cs of
- '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest
- _ -> '.':escapeString opts cs
- _ -> c : escapeString opts cs
-
-titleToRST :: [Inline] -> [Inline] -> State WriterState Doc
-titleToRST [] _ = return empty
-titleToRST tit subtit = do
- title <- inlineListToRST tit
- subtitle <- inlineListToRST subtit
- return $ bordered title '=' $$ bordered subtitle '-'
-
-bordered :: Doc -> Char -> Doc
-bordered contents c =
- if len > 0
- then border $$ contents $$ border
- else empty
- where len = offset contents
- border = text (replicate len c)
-
--- | Convert Pandoc block element to RST.
-blockToRST :: Block -- ^ Block element
- -> State WriterState Doc
-blockToRST Null = return empty
-blockToRST (Div attr bs) = do
- contents <- blockListToRST bs
- let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr)
- let endTag = ".. raw:: html" $+$ nest 3 "</div>"
- return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline
-blockToRST (Plain inlines) = inlineListToRST inlines
--- title beginning with fig: indicates that the image is a figure
-blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- inlineListToRST txt
- dims <- imageDimsToRST attr
- let fig = "figure:: " <> text src
- alt = ":alt: " <> if null tit then capt else text tit
- (_,cls,_) = attr
- classes = if null cls
- then empty
- else ":figclass: " <> text (unwords cls)
- return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline
-blockToRST (Para inlines)
- | LineBreak `elem` inlines = do -- use line block if LineBreaks
- linesToLineBlock $ splitBy (==LineBreak) inlines
- | otherwise = do
- contents <- inlineListToRST inlines
- return $ contents <> blankline
-blockToRST (LineBlock lns) =
- linesToLineBlock lns
-blockToRST (RawBlock f@(Format f') str)
- | f == "rst" = return $ text str
- | otherwise = return $ blankline <> ".. raw:: " <>
- text (map toLower f') $+$
- (nest 3 $ text str) $$ blankline
-blockToRST HorizontalRule =
- return $ blankline $$ "--------------" $$ blankline
-blockToRST (Header level (name,classes,_) inlines) = do
- contents <- inlineListToRST inlines
- isTopLevel <- gets stTopLevel
- if isTopLevel
- then do
- let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
- let border = text $ replicate (offset contents) headerChar
- return $ nowrap $ contents $$ border $$ blankline
- else do
- let rub = "rubric:: " <> contents
- let name' | null name = empty
- | otherwise = ":name: " <> text name
- let cls | null classes = empty
- | otherwise = ":class: " <> text (unwords classes)
- return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline
-blockToRST (CodeBlock (_,classes,kvs) str) = do
- opts <- stOptions <$> get
- let tabstop = writerTabStop opts
- let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs
- let numberlines = if "numberLines" `elem` classes
- then " :number-lines:" <> startnum
- else empty
- if "haskell" `elem` classes && "literate" `elem` classes &&
- isEnabled Ext_literate_haskell opts
- then return $ prefixed "> " (text str) $$ blankline
- else return $
- (case [c | c <- classes,
- c `notElem` ["sourceCode","literate","numberLines"]] of
- [] -> "::"
- (lang:_) -> (".. code:: " <> text lang) $$ numberlines)
- $+$ nest tabstop (text str) $$ blankline
-blockToRST (BlockQuote blocks) = do
- tabstop <- get >>= (return . writerTabStop . stOptions)
- contents <- blockListToRST blocks
- return $ (nest tabstop contents) <> blankline
-blockToRST (Table caption _ widths headers rows) = do
- caption' <- inlineListToRST caption
- headers' <- mapM blockListToRST headers
- rawRows <- mapM (mapM blockListToRST) rows
- -- let isSimpleCell [Plain _] = True
- -- isSimpleCell [Para _] = True
- -- isSimpleCell [] = True
- -- isSimpleCell _ = False
- -- let isSimple = all (==0) widths && all (all isSimpleCell) rows
- let numChars = maximum . map offset
- opts <- get >>= return . stOptions
- let widthsInChars =
- if all (== 0) widths
- then map ((+2) . numChars) $ transpose (headers' : rawRows)
- else map (floor . (fromIntegral (writerColumns opts) *)) widths
- let hpipeBlocks blocks = hcat [beg, middle, end]
- where h = height (hcat blocks)
- sep' = lblock 3 $ vcat (map text $ replicate h " | ")
- beg = lblock 2 $ vcat (map text $ replicate h "| ")
- end = lblock 2 $ vcat (map text $ replicate h " |")
- middle = hcat $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith lblock widthsInChars
- let head' = makeRow headers'
- let rows' = map makeRow rawRows
- let border ch = char '+' <> char ch <>
- (hcat $ intersperse (char ch <> char '+' <> char ch) $
- map (\l -> text $ replicate l ch) widthsInChars) <>
- char ch <> char '+'
- let body = vcat $ intersperse (border '-') rows'
- let head'' = if all null headers
- then empty
- else head' $$ border '='
- let tbl = border '-' $$ head'' $$ body $$ border '-'
- return $ if null caption
- then tbl $$ blankline
- else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$
- blankline
-blockToRST (BulletList items) = do
- contents <- mapM bulletListItemToRST items
- -- ensure that sublists have preceding blank line
- return $ blankline $$ chomp (vcat contents) $$ blankline
-blockToRST (OrderedList (start, style', delim) items) = do
- let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
- then take (length items) $ repeat "#."
- else take (length items) $ orderedListMarkers
- (start, style', delim)
- let maxMarkerLength = maximum $ map length markers
- let markers' = map (\m -> let s = maxMarkerLength - length m
- in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
- zip markers' items
- -- ensure that sublists have preceding blank line
- return $ blankline $$ chomp (vcat contents) $$ blankline
-blockToRST (DefinitionList items) = do
- contents <- mapM definitionListItemToRST items
- -- ensure that sublists have preceding blank line
- return $ blankline $$ chomp (vcat contents) $$ blankline
-
--- | Convert bullet list item (list of blocks) to RST.
-bulletListItemToRST :: [Block] -> State WriterState Doc
-bulletListItemToRST items = do
- contents <- blockListToRST items
- return $ hang 3 "- " $ contents <> cr
-
--- | Convert ordered list item (a list of blocks) to RST.
-orderedListItemToRST :: String -- ^ marker for list item
- -> [Block] -- ^ list item (list of blocks)
- -> State WriterState Doc
-orderedListItemToRST marker items = do
- contents <- blockListToRST items
- let marker' = marker ++ " "
- return $ hang (length marker') (text marker') $ contents <> cr
-
--- | Convert defintion list item (label, list of blocks) to RST.
-definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc
-definitionListItemToRST (label, defs) = do
- label' <- inlineListToRST label
- contents <- liftM vcat $ mapM blockListToRST defs
- tabstop <- get >>= (return . writerTabStop . stOptions)
- return $ label' $$ nest tabstop (nestle contents <> cr)
-
--- | Format a list of lines as line block.
-linesToLineBlock :: [[Inline]] -> State WriterState Doc
-linesToLineBlock inlineLines = do
- lns <- mapM inlineListToRST inlineLines
- return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline
-
--- | Convert list of Pandoc block elements to RST.
-blockListToRST' :: Bool
- -> [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToRST' topLevel blocks = do
- tl <- gets stTopLevel
- modify (\s->s{stTopLevel=topLevel})
- res <- vcat `fmap` mapM blockToRST blocks
- modify (\s->s{stTopLevel=tl})
- return res
-
-blockListToRST :: [Block] -- ^ List of block elements
- -> State WriterState Doc
-blockListToRST = blockListToRST' False
-
--- | Convert list of Pandoc inline elements to RST.
-inlineListToRST :: [Inline] -> State WriterState Doc
-inlineListToRST lst =
- mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
- return . hcat
- where -- remove spaces after displaymath, as they screw up indentation:
- removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
- Math DisplayMath x : dropWhile (==Space) zs
- removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs
- removeSpaceAfterDisplayMath [] = []
- insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
- insertBS (x:y:z:zs)
- | isComplex y && (surroundComplex x z) =
- x : y : insertBS (z : zs)
- insertBS (x:y:zs)
- | isComplex x && not (okAfterComplex y) =
- x : RawInline "rst" "\\ " : insertBS (y : zs)
- | isComplex y && not (okBeforeComplex x) =
- x : RawInline "rst" "\\ " : insertBS (y : zs)
- | otherwise =
- x : insertBS (y : zs)
- insertBS (x:ys) = x : insertBS ys
- insertBS [] = []
- surroundComplex :: Inline -> Inline -> Bool
- surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =
- case (last s, head s') of
- ('\'','\'') -> True
- ('"','"') -> True
- ('<','>') -> True
- ('[',']') -> True
- ('{','}') -> True
- _ -> False
- surroundComplex _ _ = False
- okAfterComplex :: Inline -> Bool
- okAfterComplex Space = True
- okAfterComplex SoftBreak = True
- okAfterComplex LineBreak = True
- okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String)
- okAfterComplex _ = False
- okBeforeComplex :: Inline -> Bool
- okBeforeComplex Space = True
- okBeforeComplex SoftBreak = True
- okBeforeComplex LineBreak = True
- okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String)
- okBeforeComplex _ = False
- isComplex :: Inline -> Bool
- isComplex (Emph _) = True
- isComplex (Strong _) = True
- isComplex (SmallCaps _) = True
- isComplex (Strikeout _) = True
- isComplex (Superscript _) = True
- isComplex (Subscript _) = True
- isComplex (Link _ _ _) = True
- isComplex (Image _ _ _) = True
- isComplex (Code _ _) = True
- isComplex (Math _ _) = True
- isComplex (Cite _ (x:_)) = isComplex x
- isComplex (Span _ (x:_)) = isComplex x
- isComplex _ = False
-
--- | Convert Pandoc inline element to RST.
-inlineToRST :: Inline -> State WriterState Doc
-inlineToRST (Span _ ils) = inlineListToRST ils
-inlineToRST (Emph lst) = do
- contents <- inlineListToRST lst
- return $ "*" <> contents <> "*"
-inlineToRST (Strong lst) = do
- contents <- inlineListToRST lst
- return $ "**" <> contents <> "**"
-inlineToRST (Strikeout lst) = do
- contents <- inlineListToRST lst
- return $ "[STRIKEOUT:" <> contents <> "]"
-inlineToRST (Superscript lst) = do
- contents <- inlineListToRST lst
- return $ ":sup:`" <> contents <> "`"
-inlineToRST (Subscript lst) = do
- contents <- inlineListToRST lst
- return $ ":sub:`" <> contents <> "`"
-inlineToRST (SmallCaps lst) = inlineListToRST lst
-inlineToRST (Quoted SingleQuote lst) = do
- contents <- inlineListToRST lst
- opts <- gets stOptions
- if isEnabled Ext_smart opts
- then return $ "'" <> contents <> "'"
- else return $ "‘" <> contents <> "’"
-inlineToRST (Quoted DoubleQuote lst) = do
- contents <- inlineListToRST lst
- opts <- gets stOptions
- if isEnabled Ext_smart opts
- then return $ "\"" <> contents <> "\""
- else return $ "“" <> contents <> "”"
-inlineToRST (Cite _ lst) =
- inlineListToRST lst
-inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
-inlineToRST (Str str) = do
- opts <- gets stOptions
- return $ text $
- (if isEnabled Ext_smart opts
- then unsmartify opts
- else id) $ escapeString opts str
-inlineToRST (Math t str) = do
- modify $ \st -> st{ stHasMath = True }
- return $ if t == InlineMath
- then ":math:`" <> text str <> "`"
- else if '\n' `elem` str
- then blankline $$ ".. math::" $$
- blankline $$ nest 3 (text str) $$ blankline
- else blankline $$ (".. math:: " <> text str) $$ blankline
-inlineToRST (RawInline f x)
- | f == "rst" = return $ text x
- | f == "latex" || f == "tex" = do
- modify $ \st -> st{ stHasRawTeX = True }
- return $ ":raw-latex:`" <> text x <> "`"
- | otherwise = return empty
-inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
-inlineToRST Space = return space
-inlineToRST SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- case wrapText of
- WrapPreserve -> return cr
- WrapAuto -> return space
- WrapNone -> return space
--- autolink
-inlineToRST (Link _ [Str str] (src, _))
- | isURI src &&
- if "mailto:" `isPrefixOf` src
- then src == escapeURI ("mailto:" ++ str)
- else src == escapeURI str = do
- let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
- return $ text srcSuffix
-inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do
- label <- registerImage attr alt (imgsrc,imgtit) (Just src)
- return $ "|" <> label <> "|"
-inlineToRST (Link _ txt (src, tit)) = do
- useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
- linktext <- inlineListToRST $ normalizeSpaces txt
- if useReferenceLinks
- then do refs <- get >>= return . stLinks
- case lookup txt refs of
- Just (src',tit') ->
- if src == src' && tit == tit'
- then return $ "`" <> linktext <> "`_"
- else do -- duplicate label, use non-reference link
- return $ "`" <> linktext <> " <" <> text src <> ">`__"
- Nothing -> do
- modify $ \st -> st { stLinks = (txt,(src,tit)):refs }
- return $ "`" <> linktext <> "`_"
- else return $ "`" <> linktext <> " <" <> text src <> ">`__"
-inlineToRST (Image attr alternate (source, tit)) = do
- label <- registerImage attr alternate (source,tit) Nothing
- return $ "|" <> label <> "|"
-inlineToRST (Note contents) = do
- -- add to notes in state
- notes <- gets stNotes
- modify $ \st -> st { stNotes = contents:notes }
- let ref = show $ (length notes) + 1
- return $ " [" <> text ref <> "]_"
-
-registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc
-registerImage attr alt (src,tit) mbtarget = do
- pics <- get >>= return . stImages
- txt <- case lookup alt pics of
- Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget)
- -> return alt
- _ -> do
- let alt' = if null alt || alt == [Str ""]
- then [Str $ "image" ++ show (length pics)]
- else alt
- modify $ \st -> st { stImages =
- (alt', (attr,src,tit, mbtarget)):stImages st }
- return alt'
- inlineListToRST txt
-
-imageDimsToRST :: Attr -> State WriterState Doc
-imageDimsToRST attr = do
- let (ident, _, _) = attr
- name = if null ident
- then empty
- else ":name: " <> text ident
- showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d)
- in case (dimension dir attr) of
- Just (Percent a) ->
- case dir of
- Height -> empty
- Width -> cols (Percent a)
- Just dim -> cols dim
- Nothing -> empty
- return $ cr <> name $$ showDim Width $$ showDim Height
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
deleted file mode 100644
index ef012e58e..000000000
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ /dev/null
@@ -1,412 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.RTF
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to RTF (rich text format).
--}
-module Text.Pandoc.Writers.RTF ( writeRTF
- ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Writers.Math
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Walk
-import Text.Pandoc.Logging
-import Data.List ( isSuffixOf, intercalate )
-import Data.Char ( ord, chr, isDigit )
-import qualified Data.ByteString as B
-import qualified Data.Map as M
-import Text.Printf ( printf )
-import Text.Pandoc.ImageSize
-import Control.Monad.Except (throwError, runExceptT, lift)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import qualified Text.Pandoc.Class as P
-
--- | Convert Image inlines into a raw RTF embedded image, read from a file,
--- or a MediaBag, or the internet.
--- If file not found or filetype not jpeg or png, leave the inline unchanged.
-rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline
-rtfEmbedImage opts x@(Image attr _ (src,_)) = do
- result <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
- case result of
- Right (imgdata, Just mime)
- | mime == "image/jpeg" || mime == "image/png" -> do
- let bytes = map (printf "%02x") $ B.unpack imgdata
- filetype <- case mime of
- "image/jpeg" -> return "\\jpegblip"
- "image/png" -> return "\\pngblip"
- _ -> throwError $ PandocSomeError "Unknown file type"
- sizeSpec <- case imageSize imgdata of
- Left msg -> do
- report $ CouldNotDetermineImageSize src msg
- return ""
- Right sz -> return $ "\\picw" ++ show xpx ++
- "\\pich" ++ show ypx ++
- "\\picwgoal" ++ show (floor (xpt * 20) :: Integer)
- ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer)
- -- twip = 1/1440in = 1/20pt
- where (xpx, ypx) = sizeInPixels sz
- (xpt, ypt) = desiredSizeInPoints opts attr sz
- let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++
- concat bytes ++ "}"
- if B.null imgdata
- then do
- report $ CouldNotFetchResource src "image contained no data"
- return x
- else return $ RawInline (Format "rtf") raw
- | otherwise -> do
- report $ CouldNotFetchResource src "image is not a jpeg or png"
- return x
- Right (_, Nothing) -> do
- report $ CouldNotDetermineMimeType src
- return x
- Left ( e :: PandocError ) -> do
- report $ CouldNotFetchResource src (show e)
- return x
-rtfEmbedImage _ x = return x
-
--- | Convert Pandoc to a string in rich text format.
-writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeRTF options doc = do
- -- handle images
- Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc
- let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta
- let toPlain (MetaBlocks [Para ils]) = MetaInlines ils
- toPlain x = x
- -- adjust title, author, date so we don't get para inside para
- let meta' = Meta $ M.adjust toPlain "title"
- . M.adjust toPlain "author"
- . M.adjust toPlain "date"
- $ metamap
- metadata <- metaToJSON options
- (fmap concat . mapM (blockToRTF 0 AlignDefault))
- (inlinesToRTF)
- meta'
- body <- blocksToRTF 0 AlignDefault blocks
- let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options
- isTOCHeader _ = False
- toc <- tableOfContents $ filter isTOCHeader blocks
- let context = defField "body" body
- $ defField "spacer" spacer
- $ (if writerTableOfContents options
- then defField "toc" toc
- else id)
- $ metadata
- return $ case writerTemplate options of
- Just tpl -> renderTemplate' tpl context
- Nothing -> case reverse body of
- ('\n':_) -> body
- _ -> body ++ "\n"
-
--- | Construct table of contents from list of header blocks.
-tableOfContents :: PandocMonad m => [Block] -> m String
-tableOfContents headers = do
- let contents = map elementToListItem $ hierarchicalize headers
- blocksToRTF 0 AlignDefault $
- [Header 1 nullAttr [Str "Contents"], BulletList contents]
-
-elementToListItem :: Element -> [Block]
-elementToListItem (Blk _) = []
-elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++
- if null subsecs
- then []
- else [BulletList (map elementToListItem subsecs)]
-
--- | Convert unicode characters (> 127) into rich text format representation.
-handleUnicode :: String -> String
-handleUnicode [] = []
-handleUnicode (c:cs) =
- if ord c > 127
- then if surrogate c
- then let x = ord c - 0x10000
- (q, r) = x `divMod` 0x400
- upper = q + 0xd800
- lower = r + 0xDC00
- in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs
- else enc c ++ handleUnicode cs
- else c:(handleUnicode cs)
- where
- surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff)
- || (0xe000 <= ord x && ord x <= 0xffff) )
- enc x = '\\':'u':(show (ord x)) ++ "?"
-
--- | Escape special characters.
-escapeSpecial :: String -> String
-escapeSpecial = escapeStringUsing $
- [ ('\t',"\\tab ")
- , ('\8216',"\\u8216'")
- , ('\8217',"\\u8217'")
- , ('\8220',"\\u8220\"")
- , ('\8221',"\\u8221\"")
- , ('\8211',"\\u8211-")
- , ('\8212',"\\u8212-")
- ] ++ backslashEscapes "{\\}"
-
--- | Escape strings as needed for rich text format.
-stringToRTF :: String -> String
-stringToRTF = handleUnicode . escapeSpecial
-
--- | Escape things as needed for code block in RTF.
-codeStringToRTF :: String -> String
-codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str)
-
--- | Make a paragraph with first-line indent, block indent, and space after.
-rtfParSpaced :: Int -- ^ space after (in twips)
- -> Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfParSpaced spaceAfter indent firstLineIndent alignment content =
- let alignString = case alignment of
- AlignLeft -> "\\ql "
- AlignRight -> "\\qr "
- AlignCenter -> "\\qc "
- AlignDefault -> "\\ql "
- in "{\\pard " ++ alignString ++
- "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
- " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
-
--- | Default paragraph.
-rtfPar :: Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfPar = rtfParSpaced 180
-
--- | Compact paragraph (e.g. for compact list items).
-rtfCompact :: Int -- ^ block indent (in twips)
- -> Int -- ^ first line indent (relative to block) (in twips)
- -> Alignment -- ^ alignment
- -> String -- ^ string with content
- -> String
-rtfCompact = rtfParSpaced 0
-
--- number of twips to indent
-indentIncrement :: Int
-indentIncrement = 720
-
-listIncrement :: Int
-listIncrement = 360
-
--- | Returns appropriate bullet list marker for indent level.
-bulletMarker :: Int -> String
-bulletMarker indent = case indent `mod` 720 of
- 0 -> "\\bullet "
- _ -> "\\endash "
-
--- | Returns appropriate (list of) ordered list markers for indent level.
-orderedMarkers :: Int -> ListAttributes -> [String]
-orderedMarkers indent (start, style, delim) =
- if style == DefaultStyle && delim == DefaultDelim
- then case indent `mod` 720 of
- 0 -> orderedListMarkers (start, Decimal, Period)
- _ -> orderedListMarkers (start, LowerAlpha, Period)
- else orderedListMarkers (start, style, delim)
-
-blocksToRTF :: PandocMonad m
- => Int
- -> Alignment
- -> [Block]
- -> m String
-blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align)
-
--- | Convert Pandoc block element to RTF.
-blockToRTF :: PandocMonad m
- => Int -- ^ indent level
- -> Alignment -- ^ alignment
- -> Block -- ^ block to convert
- -> m String
-blockToRTF _ _ Null = return ""
-blockToRTF indent alignment (Div _ bs) =
- blocksToRTF indent alignment bs
-blockToRTF indent alignment (Plain lst) =
- rtfCompact indent 0 alignment <$> inlinesToRTF lst
-blockToRTF indent alignment (Para lst) =
- rtfPar indent 0 alignment <$> inlinesToRTF lst
-blockToRTF indent alignment (LineBlock lns) =
- blockToRTF indent alignment $ linesToPara lns
-blockToRTF indent alignment (BlockQuote lst) =
- blocksToRTF (indent + indentIncrement) alignment lst
-blockToRTF indent _ (CodeBlock _ str) =
- return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF _ _ b@(RawBlock f str)
- | f == Format "rtf" = return str
- | otherwise = do
- report $ BlockNotRendered b
- return ""
-blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$>
- mapM (listItemToRTF alignment indent (bulletMarker indent)) lst
-blockToRTF indent alignment (OrderedList attribs lst) =
- (spaceAtEnd . concat) <$>
- mapM (\(x,y) -> listItemToRTF alignment indent x y)
- (zip (orderedMarkers indent attribs) lst)
-blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$>
- mapM (definitionListItemToRTF alignment indent) lst
-blockToRTF indent _ HorizontalRule = return $
- rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
-blockToRTF indent alignment (Header level _ lst) = do
- contents <- inlinesToRTF lst
- return $ rtfPar indent 0 alignment $
- "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ contents
-blockToRTF indent alignment (Table caption aligns sizes headers rows) = do
- caption' <- inlinesToRTF caption
- header' <- if all null headers
- then return ""
- else tableRowToRTF True indent aligns sizes headers
- rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows
- return $ header' ++ rows' ++ rtfPar indent 0 alignment caption'
-
-tableRowToRTF :: PandocMonad m
- => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String
-tableRowToRTF header indent aligns sizes' cols = do
- let totalTwips = 6 * 1440 -- 6 inches
- let sizes = if all (== 0) sizes'
- then take (length cols) $ repeat (1.0 / fromIntegral (length cols))
- else sizes'
- columns <- concat <$> mapM (\(x,y) -> tableItemToRTF indent x y)
- (zip aligns cols)
- let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
- (0 :: Integer) sizes
- let cellDefs = map (\edge -> (if header
- then "\\clbrdrb\\brdrs"
- else "") ++ "\\cellx" ++ show edge)
- rightEdges
- let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
- "\\trkeep\\intbl\n{\n"
- let end = "}\n\\intbl\\row}\n"
- return $ start ++ columns ++ end
-
-tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String
-tableItemToRTF indent alignment item = do
- contents <- blocksToRTF indent alignment item
- return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n"
-
--- | Ensure that there's the same amount of space after compact
--- lists as after regular lists.
-spaceAtEnd :: String -> String
-spaceAtEnd str =
- if isSuffixOf "\\par}\n" str
- then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
- else str
-
--- | Convert list item (list of blocks) to RTF.
-listItemToRTF :: PandocMonad m
- => Alignment -- ^ alignment
- -> Int -- ^ indent level
- -> String -- ^ list start marker
- -> [Block] -- ^ list item (list of blocks)
- -> m String
-listItemToRTF alignment indent marker [] = return $
- rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
- (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
-listItemToRTF alignment indent marker list = do
- (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list
- let listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++
- "\\tx" ++ show listIncrement ++ "\\tab"
- let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
- listMarker ++ dropWhile isDigit xs
- insertListMarker ('\\':'f':'i':d:xs) | isDigit d =
- listMarker ++ dropWhile isDigit xs
- insertListMarker (x:xs) =
- x : insertListMarker xs
- insertListMarker [] = []
- -- insert the list marker into the (processed) first block
- return $ insertListMarker first ++ concat rest
-
--- | Convert definition list item (label, list of blocks) to RTF.
-definitionListItemToRTF :: PandocMonad m
- => Alignment -- ^ alignment
- -> Int -- ^ indent level
- -> ([Inline],[[Block]]) -- ^ list item (list of blocks)
- -> m String
-definitionListItemToRTF alignment indent (label, defs) = do
- labelText <- blockToRTF indent alignment (Plain label)
- itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs)
- return $ labelText ++ itemsText
-
--- | Convert list of inline items to RTF.
-inlinesToRTF :: PandocMonad m
- => [Inline] -- ^ list of inlines to convert
- -> m String
-inlinesToRTF lst = concat <$> mapM inlineToRTF lst
-
--- | Convert inline item to RTF.
-inlineToRTF :: PandocMonad m
- => Inline -- ^ inline to convert
- -> m String
-inlineToRTF (Span _ lst) = inlinesToRTF lst
-inlineToRTF (Emph lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\i " ++ contents ++ "}"
-inlineToRTF (Strong lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\b " ++ contents ++ "}"
-inlineToRTF (Strikeout lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\strike " ++ contents ++ "}"
-inlineToRTF (Superscript lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\super " ++ contents ++ "}"
-inlineToRTF (Subscript lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\sub " ++ contents ++ "}"
-inlineToRTF (SmallCaps lst) = do
- contents <- inlinesToRTF lst
- return $ "{\\scaps " ++ contents ++ "}"
-inlineToRTF (Quoted SingleQuote lst) = do
- contents <- inlinesToRTF lst
- return $ "\\u8216'" ++ contents ++ "\\u8217'"
-inlineToRTF (Quoted DoubleQuote lst) = do
- contents <- inlinesToRTF lst
- return $ "\\u8220\"" ++ contents ++ "\\u8221\""
-inlineToRTF (Code _ str) = return $ "{\\f1 " ++ (codeStringToRTF str) ++ "}"
-inlineToRTF (Str str) = return $ stringToRTF str
-inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF
-inlineToRTF (Cite _ lst) = inlinesToRTF lst
-inlineToRTF il@(RawInline f str)
- | f == Format "rtf" = return str
- | otherwise = do
- return $ InlineNotRendered il
- return ""
-inlineToRTF (LineBreak) = return "\\line "
-inlineToRTF SoftBreak = return " "
-inlineToRTF Space = return " "
-inlineToRTF (Link _ text (src, _)) = do
- contents <- inlinesToRTF text
- return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
- "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n"
-inlineToRTF (Image _ _ (source, _)) =
- return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
-inlineToRTF (Note contents) = do
- body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents
- return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
- body ++ "}"
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
deleted file mode 100644
index 89a826269..000000000
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ /dev/null
@@ -1,183 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Shared
- Copyright : Copyright (C) 2013-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Shared utility functions for pandoc writers.
--}
-module Text.Pandoc.Writers.Shared (
- metaToJSON
- , getField
- , setField
- , defField
- , tagWithAttrs
- , fixDisplayMath
- , unsmartify
- )
-where
-import Text.Pandoc.Definition
-import Text.Pandoc.Pretty
-import Text.Pandoc.Options
-import Text.Pandoc.XML (escapeStringForXML)
-import Control.Monad (liftM)
-import qualified Data.HashMap.Strict as H
-import qualified Data.Map as M
-import qualified Data.Text as T
-import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode)
-import Text.Pandoc.UTF8 (toStringLazy)
-import qualified Data.Traversable as Traversable
-import Data.List ( groupBy )
-import Data.Maybe ( isJust )
-
--- | Create JSON value for template from a 'Meta' and an association list
--- of variables, specified at the command line or in the writer.
--- Variables overwrite metadata fields with the same names.
--- If multiple variables are set with the same name, a list is
--- assigned.
-metaToJSON :: Monad m
- => WriterOptions
- -> ([Block] -> m String)
- -> ([Inline] -> m String)
- -> Meta
- -> m Value
-metaToJSON opts blockWriter inlineWriter (Meta metamap)
- | isJust (writerTemplate opts) = do
- let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty)
- $ writerVariables opts
- renderedMap <- Traversable.mapM
- (metaValueToJSON blockWriter inlineWriter)
- metamap
- let metadata = M.foldWithKey defField baseContext renderedMap
- return $ defField "meta-json" (toStringLazy $ encode metadata) metadata
- | otherwise = return (Object H.empty)
-
-metaValueToJSON :: Monad m
- => ([Block] -> m String)
- -> ([Inline] -> m String)
- -> MetaValue
- -> m Value
-metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $
- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap
-metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $
- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs
-metaValueToJSON _ _ (MetaBool b) = return $ toJSON b
-metaValueToJSON _ _ (MetaString s) = return $ toJSON s
-metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs
-metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs
-
--- | Retrieve a field value from a JSON object.
-getField :: FromJSON a
- => String
- -> Value
- -> Maybe a
-getField field (Object hashmap) = do
- result <- H.lookup (T.pack field) hashmap
- case fromJSON result of
- Success x -> return x
- _ -> fail "Could not convert from JSON"
-getField _ _ = fail "Not a JSON object"
-
-setField :: ToJSON a
- => String
- -> a
- -> Value
- -> Value
--- | Set a field of a JSON object. If the field already has a value,
--- convert it into a list with the new value appended to the old value(s).
--- This is a utility function to be used in preparing template contexts.
-setField field val (Object hashmap) =
- Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap
- where combine newval oldval =
- case fromJSON oldval of
- Success xs -> toJSON $ xs ++ [newval]
- _ -> toJSON [oldval, newval]
-setField _ _ x = x
-
-defField :: ToJSON a
- => String
- -> a
- -> Value
- -> Value
--- | Set a field of a JSON object if it currently has no value.
--- If it has a value, do nothing.
--- This is a utility function to be used in preparing template contexts.
-defField field val (Object hashmap) =
- Object $ H.insertWith f (T.pack field) (toJSON val) hashmap
- where f _newval oldval = oldval
-defField _ _ x = x
-
--- Produce an HTML tag with the given pandoc attributes.
-tagWithAttrs :: String -> Attr -> Doc
-tagWithAttrs tag (ident,classes,kvs) = hsep
- ["<" <> text tag
- ,if null ident
- then empty
- else "id=" <> doubleQuotes (text ident)
- ,if null classes
- then empty
- else "class=" <> doubleQuotes (text (unwords classes))
- ,hsep (map (\(k,v) -> text k <> "=" <>
- doubleQuotes (text (escapeStringForXML v))) kvs)
- ] <> ">"
-
-isDisplayMath :: Inline -> Bool
-isDisplayMath (Math DisplayMath _) = True
-isDisplayMath _ = False
-
-stripLeadingTrailingSpace :: [Inline] -> [Inline]
-stripLeadingTrailingSpace = go . reverse . go . reverse
- where go (Space:xs) = xs
- go (SoftBreak:xs) = xs
- go xs = xs
-
--- Put display math in its own block (for ODT/DOCX).
-fixDisplayMath :: Block -> Block
-fixDisplayMath (Plain lst)
- | any isDisplayMath lst && not (all isDisplayMath lst) =
- -- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
- groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
- not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath (Para lst)
- | any isDisplayMath lst && not (all isDisplayMath lst) =
- -- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
- groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
- not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath x = x
-
-unsmartify :: WriterOptions -> String -> String
-unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs
-unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs
-unsmartify opts ('\8211':xs)
- | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs
- | otherwise = "--" ++ unsmartify opts xs
-unsmartify opts ('\8212':xs)
- | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs
- | otherwise = "---" ++ unsmartify opts xs
-unsmartify opts (x:xs) = x : unsmartify opts xs
-unsmartify _ [] = []
-
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
deleted file mode 100644
index a54d42c53..000000000
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ /dev/null
@@ -1,324 +0,0 @@
-{-# LANGUAGE OverloadedStrings, PatternGuards #-}
-{-
-Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Docbook
- Copyright : Copyright (C) 2006-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to Docbook XML.
--}
-module Text.Pandoc.Writers.TEI (writeTEI) where
-import Text.Pandoc.Definition
-import Text.Pandoc.XML
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Templates (renderTemplate')
-import Data.List ( stripPrefix, isPrefixOf )
-import Data.Char ( toLower )
-import Text.Pandoc.Highlighting ( languages, languagesByExtension )
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class ( PandocMonad )
-
--- | Convert list of authors to a docbook <author> section
-authorToTEI :: WriterOptions -> [Inline] -> B.Inlines
-authorToTEI opts name' =
- let name = render Nothing $ inlinesToTEI opts name'
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- in B.rawInline "tei" $ render colwidth $
- inTagsSimple "author" (text $ escapeStringForXML name)
-
--- | Convert Pandoc document to string in Docbook format.
-writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeTEI opts (Pandoc meta blocks) = return $
- let elements = hierarchicalize blocks
- colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- render' = render colwidth
- startLvl = case writerTopLevelDivision opts of
- TopLevelPart -> -1
- TopLevelChapter -> 0
- TopLevelSection -> 1
- TopLevelDefault -> 1
- auths' = map (authorToTEI opts) $ docAuthors meta
- meta' = B.setMeta "author" auths' meta
- Just metadata = metaToJSON opts
- (Just . render colwidth . (vcat .
- (map (elementToTEI opts startLvl)) . hierarchicalize))
- (Just . render colwidth . inlinesToTEI opts)
- meta'
- main = render' $ vcat (map (elementToTEI opts startLvl) elements)
- context = defField "body" main
- $ defField "mathml" (case writerHTMLMathMethod opts of
- MathML -> True
- _ -> False)
- $ metadata
- in case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
-
--- | Convert an Element to TEI.
-elementToTEI :: WriterOptions -> Int -> Element -> Doc
-elementToTEI opts _ (Blk block) = blockToTEI opts block
-elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) =
- -- TEI doesn't allow sections with no content, so insert some if needed
- let elements' = if null elements
- then [Blk (Para [])]
- else elements
- -- level numbering correspond to LaTeX internals
- divType = case lvl of
- n | n == -1 -> "part"
- | n == 0 -> "chapter"
- | n >= 1 && n <= 5 -> "level" ++ show n
- | otherwise -> "section"
- in inTags True "div" [("type", divType) | not (null id')] $
--- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $
- inTagsSimple "head" (inlinesToTEI opts title) $$
- vcat (map (elementToTEI opts (lvl + 1)) elements')
-
--- | Convert a list of Pandoc blocks to TEI.
-blocksToTEI :: WriterOptions -> [Block] -> Doc
-blocksToTEI opts = vcat . map (blockToTEI opts)
-
--- | Auxiliary function to convert Plain block to Para.
-plainToPara :: Block -> Block
-plainToPara (Plain x) = Para x
-plainToPara x = x
-
--- | Convert a list of pairs of terms and definitions into a TEI
--- list with labels and items.
-deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc
-deflistItemsToTEI opts items =
- vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items
-
--- | Convert a term and a list of blocks into a TEI varlistentry.
-deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc
-deflistItemToTEI opts term defs =
- let def' = concatMap (map plainToPara) defs
- in inTagsIndented "label" (inlinesToTEI opts term) $$
- inTagsIndented "item" (blocksToTEI opts def')
-
--- | Convert a list of lists of blocks to a list of TEI list items.
-listItemsToTEI :: WriterOptions -> [[Block]] -> Doc
-listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items
-
--- | Convert a list of blocks into a TEI list item.
-listItemToTEI :: WriterOptions -> [Block] -> Doc
-listItemToTEI opts item =
- inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item
-
-imageToTEI :: WriterOptions -> Attr -> String -> Doc
-imageToTEI _ attr src = selfClosingTag "graphic" $
- ("url", src) : idAndRole attr ++ dims
- where
- dims = go Width "width" ++ go Height "depth"
- go dir dstr = case (dimension dir attr) of
- Just a -> [(dstr, show a)]
- Nothing -> []
-
--- | Convert a Pandoc block element to TEI.
-blockToTEI :: WriterOptions -> Block -> Doc
-blockToTEI _ Null = empty
--- Add ids to paragraphs in divs with ids - this is needed for
--- pandoc-citeproc to get link anchors in bibliographies:
-blockToTEI opts (Div (ident,_,_) [Para lst]) =
- let attribs = [("id", ident) | not (null ident)] in
- inTags False "p" attribs $ inlinesToTEI opts lst
-blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
-blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize
--- For TEI simple, text must be within containing block element, so
--- we use plainToPara to ensure that Plain text ends up contained by
--- something.
-blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
--- title beginning with fig: indicates that the image is a figure
---blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
--- let alt = inlinesToTEI opts txt
--- capt = if null txt
--- then empty
--- else inTagsSimple "title" alt
--- in inTagsIndented "figure" $
--- capt $$
--- (inTagsIndented "mediaobject" $
--- (inTagsIndented "imageobject"
--- (imageToTEI opts attr src)) $$
--- inTagsSimple "textobject" (inTagsSimple "phrase" alt))
-blockToTEI opts (Para lst) =
- inTags False "p" [] $ inlinesToTEI opts lst
-blockToTEI opts (LineBlock lns) =
- blockToTEI opts $ linesToPara lns
-blockToTEI opts (BlockQuote blocks) =
- inTagsIndented "quote" $ blocksToTEI opts blocks
-blockToTEI _ (CodeBlock (_,classes,_) str) =
- text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
- flush (text (escapeStringForXML str) <> cr <> text "</ab>")
- where lang = if null langs
- then ""
- else escapeStringForXML (head langs)
- isLang l = map toLower l `elem` map (map toLower) languages
- langsFrom s = if isLang s
- then [s]
- else languagesByExtension . map toLower $ s
- langs = concatMap langsFrom classes
-blockToTEI opts (BulletList lst) =
- let attribs = [("type", "unordered")]
- in inTags True "list" attribs $ listItemsToTEI opts lst
-blockToTEI _ (OrderedList _ []) = empty
-blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) =
- let attribs = case numstyle of
- DefaultStyle -> []
- Decimal -> [("type", "ordered:arabic")]
- Example -> [("type", "ordered:arabic")]
- UpperAlpha -> [("type", "ordered:upperalpha")]
- LowerAlpha -> [("type", "ordered:loweralpha")]
- UpperRoman -> [("type", "ordered:upperroman")]
- LowerRoman -> [("type", "ordered:lowerroman")]
- items = if start == 1
- then listItemsToTEI opts (first:rest)
- else (inTags True "item" [("n",show start)]
- (blocksToTEI opts $ map plainToPara first)) $$
- listItemsToTEI opts rest
- in inTags True "list" attribs items
-blockToTEI opts (DefinitionList lst) =
- let attribs = [("type", "definition")]
- in inTags True "list" attribs $ deflistItemsToTEI opts lst
-blockToTEI _ (RawBlock f str)
- | f == "tei" = text str -- raw TEI block (should such a thing exist).
--- | f == "html" = text str -- allow html for backwards compatibility
- | otherwise = empty
-blockToTEI _ HorizontalRule =
- selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")]
-
--- | TEI Tables
--- TEI Simple's tables are composed of cells and rows; other
--- table info in the AST is here lossily discard.
-blockToTEI opts (Table _ _ _ headers rows) =
- let
- headers' = tableHeadersToTEI opts headers
--- headers' = if all null headers
--- then return empty
--- else tableRowToTEI opts headers
- in
- inTags True "table" [] $
- vcat $ [headers'] <> map (tableRowToTEI opts) rows
-
-tableRowToTEI :: WriterOptions
- -> [[Block]]
- -> Doc
-tableRowToTEI opts cols =
- inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols
-
-tableHeadersToTEI :: WriterOptions
- -> [[Block]]
- -> Doc
-tableHeadersToTEI opts cols =
- inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols
-
-tableItemToTEI :: WriterOptions
- -> [Block]
- -> Doc
-tableItemToTEI opts item =
- inTags False "cell" [] $ vcat $ map (blockToTEI opts) item
-
--- | Convert a list of inline elements to TEI.
-inlinesToTEI :: WriterOptions -> [Inline] -> Doc
-inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst
-
--- | Convert an inline element to TEI.
-inlineToTEI :: WriterOptions -> Inline -> Doc
-inlineToTEI _ (Str str) = text $ escapeStringForXML str
-inlineToTEI opts (Emph lst) =
- inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst
-inlineToTEI opts (Strong lst) =
- inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst
-inlineToTEI opts (Strikeout lst) =
- inTags False "hi" [("rendition", "simple:strikethrough")] $
- inlinesToTEI opts lst
-inlineToTEI opts (Superscript lst) =
- inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst
-inlineToTEI opts (Subscript lst) =
- inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst
-inlineToTEI opts (SmallCaps lst) =
- inTags False "hi" [("rendition", "simple:smallcaps")] $
- inlinesToTEI opts lst
-inlineToTEI opts (Quoted _ lst) =
- inTagsSimple "quote" $ inlinesToTEI opts lst
-inlineToTEI opts (Cite _ lst) =
- inlinesToTEI opts lst
-inlineToTEI opts (Span _ ils) =
- inlinesToTEI opts ils
-inlineToTEI _ (Code _ str) =
- inTags False "seg" [("type","code")] $ text (escapeStringForXML str)
--- Distinguish display from inline math by wrapping the former in a "figure."
-inlineToTEI _ (Math t str) =
- case t of
- InlineMath -> inTags False "formula" [("notation","TeX")] $
- text (str)
- DisplayMath -> inTags True "figure" [("type","math")] $
- inTags False "formula" [("notation","TeX")] $ text (str)
-
-inlineToTEI _ (RawInline f x) | f == "tei" = text x
- | otherwise = empty
-inlineToTEI _ LineBreak = selfClosingTag "lb" []
-inlineToTEI _ Space = space
--- because we use \n for LineBreak, we can't do soft breaks:
-inlineToTEI _ SoftBreak = space
-inlineToTEI opts (Link attr txt (src, _))
- | Just email <- stripPrefix "mailto:" src =
- let emailLink = text $
- escapeStringForXML $ email
- in case txt of
- [Str s] | escapeURI s == email -> emailLink
- _ -> inlinesToTEI opts txt <+>
- char '(' <> emailLink <> char ')'
- | otherwise =
- (if isPrefixOf "#" src
- then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr
- else inTags False "ref" $ ("target", src) : idAndRole attr ) $
- inlinesToTEI opts txt
-inlineToTEI opts (Image attr description (src, tit)) =
- let titleDoc = if null tit
- then empty
- else inTags False "figDesc" [] (text $ escapeStringForXML tit)
- imageDesc = if null description
- then empty
- else inTags False "head" [] (inlinesToTEI opts description)
- in inTagsIndented "figure" $ imageDesc $$
- imageToTEI opts attr src $$ titleDoc
-inlineToTEI opts (Note contents) =
- inTagsIndented "note" $ blocksToTEI opts contents
-
-idAndRole :: Attr -> [(String, String)]
-idAndRole (id',cls,_) = ident ++ role
- where
- ident = if null id'
- then []
- else [("id", id')]
- role = if null cls
- then []
- else [("role", unwords cls)]
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
deleted file mode 100644
index fe6024351..000000000
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ /dev/null
@@ -1,498 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2008-2015 John MacFarlane and Peter Wang
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Texinfo
- Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' format into Texinfo.
--}
-module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Printf ( printf )
-import Data.List ( transpose, maximumBy )
-import Data.Ord ( comparing )
-import Data.Char ( chr, ord )
-import Control.Monad.State
-import Text.Pandoc.Pretty
-import Text.Pandoc.ImageSize
-import Network.URI ( isURI, unEscapeString )
-import System.FilePath
-import qualified Data.Set as Set
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
-
-data WriterState =
- WriterState { stStrikeout :: Bool -- document contains strikeout
- , stSuperscript :: Bool -- document contains superscript
- , stSubscript :: Bool -- document contains subscript
- , stEscapeComma :: Bool -- in a context where we need @comma
- , stIdentifiers :: Set.Set String -- header ids used already
- , stOptions :: WriterOptions -- writer options
- }
-
-{- TODO:
- - internal cross references a la HTML
- - generated .texi files don't work when run through texi2dvi
- -}
-
-type TI m = StateT WriterState m
-
--- | Convert Pandoc to Texinfo.
-writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeTexinfo options document =
- evalStateT (pandocToTexinfo options $ wrapTop document) $
- WriterState { stStrikeout = False, stSuperscript = False,
- stEscapeComma = False, stSubscript = False,
- stIdentifiers = Set.empty, stOptions = options}
-
--- | Add a "Top" node around the document, needed by Texinfo.
-wrapTop :: Pandoc -> Pandoc
-wrapTop (Pandoc meta blocks) =
- Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks)
-
-pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m String
-pandocToTexinfo options (Pandoc meta blocks) = do
- let titlePage = not $ all null
- $ docTitle meta : docDate meta : docAuthors meta
- let colwidth = if writerWrapText options == WrapAuto
- then Just $ writerColumns options
- else Nothing
- metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToTexinfo)
- (fmap (render colwidth) . inlineListToTexinfo)
- meta
- main <- blockListToTexinfo blocks
- st <- get
- let body = render colwidth main
- let context = defField "body" body
- $ defField "toc" (writerTableOfContents options)
- $ defField "titlepage" titlePage
- $ defField "subscript" (stSubscript st)
- $ defField "superscript" (stSuperscript st)
- $ defField "strikeout" (stStrikeout st)
- $ metadata
- case writerTemplate options of
- Nothing -> return body
- Just tpl -> return $ renderTemplate' tpl context
-
--- | Escape things as needed for Texinfo.
-stringToTexinfo :: String -> String
-stringToTexinfo = escapeStringUsing texinfoEscapes
- where texinfoEscapes = [ ('{', "@{")
- , ('}', "@}")
- , ('@', "@@")
- , ('\160', "@ ")
- , ('\x2014', "---")
- , ('\x2013', "--")
- , ('\x2026', "@dots{}")
- , ('\x2019', "'")
- ]
-
-escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc
-escapeCommas parser = do
- oldEscapeComma <- gets stEscapeComma
- modify $ \st -> st{ stEscapeComma = True }
- res <- parser
- modify $ \st -> st{ stEscapeComma = oldEscapeComma }
- return res
-
--- | Puts contents into Texinfo command.
-inCmd :: String -> Doc -> Doc
-inCmd cmd contents = char '@' <> text cmd <> braces contents
-
--- | Convert Pandoc block element to Texinfo.
-blockToTexinfo :: PandocMonad m
- => Block -- ^ Block to convert
- -> TI m Doc
-
-blockToTexinfo Null = return empty
-
-blockToTexinfo (Div _ bs) = blockListToTexinfo bs
-
-blockToTexinfo (Plain lst) =
- inlineListToTexinfo lst
-
--- title beginning with fig: indicates that the image is a figure
-blockToTexinfo (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- if null txt
- then return empty
- else (\c -> text "@caption" <> braces c) `fmap`
- inlineListToTexinfo txt
- img <- inlineToTexinfo (Image attr txt (src,tit))
- return $ text "@float" $$ img $$ capt $$ text "@end float"
-
-blockToTexinfo (Para lst) =
- inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo
-
-blockToTexinfo (LineBlock lns) =
- blockToTexinfo $ linesToPara lns
-
-blockToTexinfo (BlockQuote lst) = do
- contents <- blockListToTexinfo lst
- return $ text "@quotation" $$
- contents $$
- text "@end quotation"
-
-blockToTexinfo (CodeBlock _ str) = do
- return $ blankline $$
- text "@verbatim" $$
- flush (text str) $$
- text "@end verbatim" <> blankline
-
-blockToTexinfo b@(RawBlock f str)
- | f == "texinfo" = return $ text str
- | f == "latex" || f == "tex" =
- return $ text "@tex" $$ text str $$ text "@end tex"
- | otherwise = do
- report $ BlockNotRendered b
- return empty
-
-blockToTexinfo (BulletList lst) = do
- items <- mapM listItemToTexinfo lst
- return $ text "@itemize" $$
- vcat items $$
- text "@end itemize" <> blankline
-
-blockToTexinfo (OrderedList (start, numstyle, _) lst) = do
- items <- mapM listItemToTexinfo lst
- return $ text "@enumerate " <> exemplar $$
- vcat items $$
- text "@end enumerate" <> blankline
- where
- exemplar = case numstyle of
- DefaultStyle -> decimal
- Decimal -> decimal
- Example -> decimal
- UpperRoman -> decimal -- Roman numerals not supported
- LowerRoman -> decimal
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- decimal = if start == 1
- then empty
- else text (show start)
- upperAlpha = text [chr $ ord 'A' + start - 1]
- lowerAlpha = text [chr $ ord 'a' + start - 1]
-
-blockToTexinfo (DefinitionList lst) = do
- items <- mapM defListItemToTexinfo lst
- return $ text "@table @asis" $$
- vcat items $$
- text "@end table" <> blankline
-
-blockToTexinfo HorizontalRule =
- -- XXX can't get the equivalent from LaTeX.hs to work
- return $ text "@iftex" $$
- text "@bigskip@hrule@bigskip" $$
- text "@end iftex" $$
- text "@ifnottex" $$
- text (take 72 $ repeat '-') $$
- text "@end ifnottex"
-
-blockToTexinfo (Header 0 _ lst) = do
- txt <- if null lst
- then return $ text "Top"
- else inlineListToTexinfo lst
- return $ text "@node Top" $$
- text "@top " <> txt <> blankline
-
-blockToTexinfo (Header level _ lst)
- | level < 1 || level > 4 = blockToTexinfo (Para lst)
- | otherwise = do
- node <- inlineListForNode lst
- txt <- inlineListToTexinfo lst
- idsUsed <- gets stIdentifiers
- let id' = uniqueIdent lst idsUsed
- modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
- sec <- seccmd level
- return $ if (level > 0) && (level <= 4)
- then blankline <> text "@node " <> node $$
- text sec <> txt $$
- text "@anchor" <> braces (text $ '#':id')
- else txt
- where
- seccmd :: PandocMonad m => Int -> TI m String
- seccmd 1 = return "@chapter "
- seccmd 2 = return "@section "
- seccmd 3 = return "@subsection "
- seccmd 4 = return "@subsubsection "
- seccmd _ = throwError $ PandocSomeError "illegal seccmd level"
-
-blockToTexinfo (Table caption aligns widths heads rows) = do
- headers <- if all null heads
- then return empty
- else tableHeadToTexinfo aligns heads
- captionText <- inlineListToTexinfo caption
- rowsText <- mapM (tableRowToTexinfo aligns) rows
- colDescriptors <-
- if all (== 0) widths
- then do -- use longest entry instead of column widths
- cols <- mapM (mapM (liftM (render Nothing . hcat) . mapM blockToTexinfo)) $
- transpose $ heads : rows
- return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols
- else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
- let tableBody = text ("@multitable " ++ colDescriptors) $$
- headers $$
- vcat rowsText $$
- text "@end multitable"
- return $ if isEmpty captionText
- then tableBody <> blankline
- else text "@float" $$
- tableBody $$
- inCmd "caption" captionText $$
- text "@end float"
-
-tableHeadToTexinfo :: PandocMonad m
- => [Alignment]
- -> [[Block]]
- -> TI m Doc
-tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem "
-
-tableRowToTexinfo :: PandocMonad m
- => [Alignment]
- -> [[Block]]
- -> TI m Doc
-tableRowToTexinfo = tableAnyRowToTexinfo "@item "
-
-tableAnyRowToTexinfo :: PandocMonad m
- => String
- -> [Alignment]
- -> [[Block]]
- -> TI m Doc
-tableAnyRowToTexinfo itemtype aligns cols =
- zipWithM alignedBlock aligns cols >>=
- return . (text itemtype $$) . foldl (\row item -> row $$
- (if isEmpty row then empty else text " @tab ") <> item) empty
-
-alignedBlock :: PandocMonad m
- => Alignment
- -> [Block]
- -> TI m Doc
--- XXX @flushleft and @flushright text won't get word wrapped. Since word
--- wrapping is more important than alignment, we ignore the alignment.
-alignedBlock _ = blockListToTexinfo
-{-
-alignedBlock AlignLeft col = do
- b <- blockListToTexinfo col
- return $ text "@flushleft" $$ b $$ text "@end flushleft"
-alignedBlock AlignRight col = do
- b <- blockListToTexinfo col
- return $ text "@flushright" $$ b $$ text "@end flushright"
-alignedBlock _ col = blockListToTexinfo col
--}
-
--- | Convert Pandoc block elements to Texinfo.
-blockListToTexinfo :: PandocMonad m
- => [Block]
- -> TI m Doc
-blockListToTexinfo [] = return empty
-blockListToTexinfo (x:xs) = do
- x' <- blockToTexinfo x
- case x of
- Header level _ _ -> do
- -- We need need to insert a menu for this node.
- let (before, after) = break isHeaderBlock xs
- before' <- blockListToTexinfo before
- let menu = if level < 4
- then collectNodes (level + 1) after
- else []
- lines' <- mapM makeMenuLine menu
- let menu' = if null lines'
- then empty
- else text "@menu" $$
- vcat lines' $$
- text "@end menu"
- after' <- blockListToTexinfo after
- return $ x' $$ before' $$ menu' $$ after'
- Para _ -> do
- xs' <- blockListToTexinfo xs
- case xs of
- ((CodeBlock _ _):_) -> return $ x' $$ xs'
- _ -> return $ x' $+$ xs'
- _ -> do
- xs' <- blockListToTexinfo xs
- return $ x' $$ xs'
-
-collectNodes :: Int -> [Block] -> [Block]
-collectNodes _ [] = []
-collectNodes level (x:xs) =
- case x of
- (Header hl _ _) ->
- if hl < level
- then []
- else if hl == level
- then x : collectNodes level xs
- else collectNodes level xs
- _ ->
- collectNodes level xs
-
-makeMenuLine :: PandocMonad m
- => Block
- -> TI m Doc
-makeMenuLine (Header _ _ lst) = do
- txt <- inlineListForNode lst
- return $ text "* " <> txt <> text "::"
-makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block"
-
-listItemToTexinfo :: PandocMonad m
- => [Block]
- -> TI m Doc
-listItemToTexinfo lst = do
- contents <- blockListToTexinfo lst
- let spacer = case reverse lst of
- (Para{}:_) -> blankline
- _ -> empty
- return $ text "@item" $$ contents <> spacer
-
-defListItemToTexinfo :: PandocMonad m
- => ([Inline], [[Block]])
- -> TI m Doc
-defListItemToTexinfo (term, defs) = do
- term' <- inlineListToTexinfo term
- let defToTexinfo bs = do d <- blockListToTexinfo bs
- case reverse bs of
- (Para{}:_) -> return $ d <> blankline
- _ -> return d
- defs' <- mapM defToTexinfo defs
- return $ text "@item " <> term' $+$ vcat defs'
-
--- | Convert list of inline elements to Texinfo.
-inlineListToTexinfo :: PandocMonad m
- => [Inline] -- ^ Inlines to convert
- -> TI m Doc
-inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
-
--- | Convert list of inline elements to Texinfo acceptable for a node name.
-inlineListForNode :: PandocMonad m
- => [Inline] -- ^ Inlines to convert
- -> TI m Doc
-inlineListForNode = return . text . stringToTexinfo .
- filter (not . disallowedInNode) . stringify
-
--- periods, commas, colons, and parentheses are disallowed in node names
-disallowedInNode :: Char -> Bool
-disallowedInNode c = c `elem` (".,:()" :: String)
-
--- | Convert inline element to Texinfo
-inlineToTexinfo :: PandocMonad m
- => Inline -- ^ Inline to convert
- -> TI m Doc
-
-inlineToTexinfo (Span _ lst) =
- inlineListToTexinfo lst
-
-inlineToTexinfo (Emph lst) =
- inlineListToTexinfo lst >>= return . inCmd "emph"
-
-inlineToTexinfo (Strong lst) =
- inlineListToTexinfo lst >>= return . inCmd "strong"
-
-inlineToTexinfo (Strikeout lst) = do
- modify $ \st -> st{ stStrikeout = True }
- contents <- inlineListToTexinfo lst
- return $ text "@textstrikeout{" <> contents <> text "}"
-
-inlineToTexinfo (Superscript lst) = do
- modify $ \st -> st{ stSuperscript = True }
- contents <- inlineListToTexinfo lst
- return $ text "@textsuperscript{" <> contents <> char '}'
-
-inlineToTexinfo (Subscript lst) = do
- modify $ \st -> st{ stSubscript = True }
- contents <- inlineListToTexinfo lst
- return $ text "@textsubscript{" <> contents <> char '}'
-
-inlineToTexinfo (SmallCaps lst) =
- inlineListToTexinfo lst >>= return . inCmd "sc"
-
-inlineToTexinfo (Code _ str) = do
- return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
-
-inlineToTexinfo (Quoted SingleQuote lst) = do
- contents <- inlineListToTexinfo lst
- return $ char '`' <> contents <> char '\''
-
-inlineToTexinfo (Quoted DoubleQuote lst) = do
- contents <- inlineListToTexinfo lst
- return $ text "``" <> contents <> text "''"
-
-inlineToTexinfo (Cite _ lst) =
- inlineListToTexinfo lst
-inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
-inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
-inlineToTexinfo il@(RawInline f str)
- | f == "latex" || f == "tex" =
- return $ text "@tex" $$ text str $$ text "@end tex"
- | f == "texinfo" = return $ text str
- | otherwise = do
- report $ InlineNotRendered il
- return empty
-inlineToTexinfo (LineBreak) = return $ text "@*" <> cr
-inlineToTexinfo SoftBreak = do
- wrapText <- gets (writerWrapText . stOptions)
- case wrapText of
- WrapAuto -> return space
- WrapNone -> return space
- WrapPreserve -> return cr
-inlineToTexinfo Space = return space
-
-inlineToTexinfo (Link _ txt (src@('#':_), _)) = do
- contents <- escapeCommas $ inlineListToTexinfo txt
- return $ text "@ref" <>
- braces (text (stringToTexinfo src) <> text "," <> contents)
-inlineToTexinfo (Link _ txt (src, _)) = do
- case txt of
- [Str x] | escapeURI x == src -> -- autolink
- do return $ text $ "@url{" ++ x ++ "}"
- _ -> do contents <- escapeCommas $ inlineListToTexinfo txt
- let src1 = stringToTexinfo src
- return $ text ("@uref{" ++ src1 ++ ",") <> contents <>
- char '}'
-
-inlineToTexinfo (Image attr alternate (source, _)) = do
- content <- escapeCommas $ inlineListToTexinfo alternate
- opts <- gets stOptions
- let showDim dim = case (dimension dim attr) of
- (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in"
- (Just (Percent _)) -> ""
- (Just d) -> show d
- Nothing -> ""
- return $ text ("@image{" ++ base ++ ',':(showDim Width) ++ ',':(showDim Height) ++ ",")
- <> content <> text "," <> text (ext ++ "}")
- where
- ext = drop 1 $ takeExtension source'
- base = dropExtension source'
- source' = if isURI source
- then source
- else unEscapeString source
-
-inlineToTexinfo (Note contents) = do
- contents' <- blockListToTexinfo contents
- return $ text "@footnote" <> braces contents'
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
deleted file mode 100644
index 45f1780cf..000000000
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ /dev/null
@@ -1,486 +0,0 @@
-{-
-Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.Textile
- Copyright : Copyright (C) 2010-2015 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Conversion of 'Pandoc' documents to Textile markup.
-
-Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
--}
-module Text.Pandoc.Writers.Textile ( writeTextile ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared
-import Text.Pandoc.Pretty (render)
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Writers.Shared
-import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intercalate )
-import Control.Monad.State
-import Data.Char ( isSpace )
-import Text.Pandoc.Class ( PandocMonad )
-
-data WriterState = WriterState {
- stNotes :: [String] -- Footnotes
- , stListLevel :: [Char] -- String at beginning of list items, e.g. "**"
- , stStartNum :: Maybe Int -- Start number if first list item
- , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
- }
-
--- | Convert Pandoc to Textile.
-writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeTextile opts document = return $
- evalState (pandocToTextile opts document)
- WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing,
- stUseTags = False }
-
--- | Return Textile representation of document.
-pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
-pandocToTextile opts (Pandoc meta blocks) = do
- metadata <- metaToJSON opts (blockListToTextile opts)
- (inlineListToTextile opts) meta
- body <- blockListToTextile opts blocks
- notes <- liftM (unlines . reverse . stNotes) get
- let main = body ++ if null notes then "" else ("\n\n" ++ notes)
- let context = defField "body" main metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
-
-withUseTags :: State WriterState a -> State WriterState a
-withUseTags action = do
- oldUseTags <- liftM stUseTags get
- modify $ \s -> s { stUseTags = True }
- result <- action
- modify $ \s -> s { stUseTags = oldUseTags }
- return result
-
--- | Escape one character as needed for Textile.
-escapeCharForTextile :: Char -> String
-escapeCharForTextile x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- '*' -> "&#42;"
- '_' -> "&#95;"
- '@' -> "&#64;"
- '+' -> "&#43;"
- '-' -> "&#45;"
- '|' -> "&#124;"
- '\x2014' -> " -- "
- '\x2013' -> " - "
- '\x2019' -> "'"
- '\x2026' -> "..."
- c -> [c]
-
--- | Escape string as needed for Textile.
-escapeStringForTextile :: String -> String
-escapeStringForTextile = concatMap escapeCharForTextile
-
--- | Convert Pandoc block element to Textile.
-blockToTextile :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState String
-
-blockToTextile _ Null = return ""
-
-blockToTextile opts (Div attr bs) = do
- let startTag = render Nothing $ tagWithAttrs "div" attr
- let endTag = "</div>"
- contents <- blockListToTextile opts bs
- return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n"
-
-blockToTextile opts (Plain inlines) =
- inlineListToTextile opts inlines
-
--- title beginning with fig: indicates that the image is a figure
-blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- blockToTextile opts (Para txt)
- im <- inlineToTextile opts (Image attr txt (src,tit))
- return $ im ++ "\n" ++ capt
-
-blockToTextile opts (Para inlines) = do
- useTags <- liftM stUseTags get
- listLevel <- liftM stListLevel get
- contents <- inlineListToTextile opts inlines
- return $ if useTags
- then "<p>" ++ contents ++ "</p>"
- else contents ++ if null listLevel then "\n" else ""
-
-blockToTextile opts (LineBlock lns) =
- blockToTextile opts $ linesToPara lns
-
-blockToTextile _ (RawBlock f str)
- | f == Format "html" || f == Format "textile" = return str
- | otherwise = return ""
-
-blockToTextile _ HorizontalRule = return "<hr />\n"
-
-blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do
- contents <- inlineListToTextile opts inlines
- let identAttr = if null ident then "" else ('#':ident)
- let attribs = if null identAttr && null classes
- then ""
- else "(" ++ unwords classes ++ identAttr ++ ")"
- let lang = maybe "" (\x -> "[" ++ x ++ "]") $ lookup "lang" keyvals
- let styles = maybe "" (\x -> "{" ++ x ++ "}") $ lookup "style" keyvals
- let prefix = 'h' : show level ++ attribs ++ styles ++ lang ++ ". "
- return $ prefix ++ contents ++ "\n"
-
-blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) =
- return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++
- "\n</pre>\n"
- where classes' = if null classes
- then ""
- else " class=\"" ++ unwords classes ++ "\""
-
-blockToTextile _ (CodeBlock (_,classes,_) str) =
- return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n"
- where classes' = if null classes
- then ""
- else "(" ++ unwords classes ++ ")"
-
-blockToTextile opts (BlockQuote bs@[Para _]) = do
- contents <- blockListToTextile opts bs
- return $ "bq. " ++ contents ++ "\n\n"
-
-blockToTextile opts (BlockQuote blocks) = do
- contents <- blockListToTextile opts blocks
- return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n"
-
-blockToTextile opts (Table [] aligns widths headers rows') |
- all (==0) widths = do
- hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers
- let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|"
- let header = if all null headers then "" else cellsToRow hs ++ "\n"
- let blocksToCell (align, bs) = do
- contents <- stripTrailingNewlines <$> blockListToTextile opts bs
- let alignMarker = case align of
- AlignLeft -> "<. "
- AlignRight -> ">. "
- AlignCenter -> "=. "
- AlignDefault -> ""
- return $ alignMarker ++ contents
- let rowToCells = mapM blocksToCell . zip aligns
- bs <- mapM rowToCells rows'
- let body = unlines $ map cellsToRow bs
- return $ header ++ body
-
-blockToTextile opts (Table capt aligns widths headers rows') = do
- let alignStrings = map alignmentToString aligns
- captionDoc <- if null capt
- then return ""
- else do
- c <- inlineListToTextile opts capt
- return $ "<caption>" ++ c ++ "</caption>\n"
- let percent w = show (truncate (100*w) :: Integer) ++ "%"
- let coltags = if all (== 0.0) widths
- then ""
- else unlines $ map
- (\w -> "<col width=\"" ++ percent w ++ "\" />") widths
- head' <- if all null headers
- then return ""
- else do
- hs <- tableRowToTextile opts alignStrings 0 headers
- return $ "<thead>\n" ++ hs ++ "\n</thead>\n"
- body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows'
- return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++
- "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n"
-
-blockToTextile opts x@(BulletList items) = do
- oldUseTags <- liftM stUseTags get
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
- then do
- contents <- withUseTags $ mapM (listItemToTextile opts) items
- return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n"
- else do
- modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
- level <- get >>= return . length . stListLevel
- contents <- mapM (listItemToTextile opts) items
- modify $ \s -> s { stListLevel = init (stListLevel s) }
- return $ vcat contents ++ (if level > 1 then "" else "\n")
-
-blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do
- oldUseTags <- liftM stUseTags get
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
- then do
- contents <- withUseTags $ mapM (listItemToTextile opts) items
- return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++
- "\n</ol>\n"
- else do
- modify $ \s -> s { stListLevel = stListLevel s ++ "#"
- , stStartNum = if start > 1
- then Just start
- else Nothing }
- level <- get >>= return . length . stListLevel
- contents <- mapM (listItemToTextile opts) items
- modify $ \s -> s { stListLevel = init (stListLevel s),
- stStartNum = Nothing }
- return $ vcat contents ++ (if level > 1 then "" else "\n")
-
-blockToTextile opts (DefinitionList items) = do
- contents <- withUseTags $ mapM (definitionListItemToTextile opts) items
- return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n"
-
--- Auxiliary functions for lists:
-
--- | Convert ordered list attributes to HTML attribute string
-listAttribsToString :: ListAttributes -> String
-listAttribsToString (startnum, numstyle, _) =
- let numstyle' = camelCaseToHyphenated $ show numstyle
- in (if startnum /= 1
- then " start=\"" ++ show startnum ++ "\""
- else "") ++
- (if numstyle /= DefaultStyle
- then " style=\"list-style-type: " ++ numstyle' ++ ";\""
- else "")
-
--- | Convert bullet or ordered list item (list of blocks) to Textile.
-listItemToTextile :: WriterOptions -> [Block] -> State WriterState String
-listItemToTextile opts items = do
- contents <- blockListToTextile opts items
- useTags <- get >>= return . stUseTags
- if useTags
- then return $ "<li>" ++ contents ++ "</li>"
- else do
- marker <- gets stListLevel
- mbstart <- gets stStartNum
- case mbstart of
- Just n -> do
- modify $ \s -> s{ stStartNum = Nothing }
- return $ marker ++ show n ++ " " ++ contents
- Nothing -> return $ marker ++ " " ++ contents
-
--- | Convert definition list item (label, list of blocks) to Textile.
-definitionListItemToTextile :: WriterOptions
- -> ([Inline],[[Block]])
- -> State WriterState String
-definitionListItemToTextile opts (label, items) = do
- labelText <- inlineListToTextile opts label
- contents <- mapM (blockListToTextile opts) items
- return $ "<dt>" ++ labelText ++ "</dt>\n" ++
- (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents)
-
--- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
-isSimpleList :: Block -> Bool
-isSimpleList x =
- case x of
- BulletList items -> all isSimpleListItem items
- OrderedList (_, sty, _) items -> all isSimpleListItem items &&
- sty `elem` [DefaultStyle, Decimal]
- _ -> False
-
--- | True if list item can be handled with the simple wiki syntax. False if
--- HTML tags will be needed.
-isSimpleListItem :: [Block] -> Bool
-isSimpleListItem [] = True
-isSimpleListItem [x] =
- case x of
- Plain _ -> True
- Para _ -> True
- BulletList _ -> isSimpleList x
- OrderedList _ _ -> isSimpleList x
- _ -> False
-isSimpleListItem [x, y] | isPlainOrPara x =
- case y of
- BulletList _ -> isSimpleList y
- OrderedList _ _ -> isSimpleList y
- _ -> False
-isSimpleListItem _ = False
-
-isPlainOrPara :: Block -> Bool
-isPlainOrPara (Plain _) = True
-isPlainOrPara (Para _) = True
-isPlainOrPara _ = False
-
--- | Concatenates strings with line breaks between them.
-vcat :: [String] -> String
-vcat = intercalate "\n"
-
--- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki,
--- and Textile writers, and should be abstracted out.)
-
-tableRowToTextile :: WriterOptions
- -> [String]
- -> Int
- -> [[Block]]
- -> State WriterState String
-tableRowToTextile opts alignStrings rownum cols' = do
- let celltype = if rownum == 0 then "th" else "td"
- let rowclass = case rownum of
- 0 -> "header"
- x | x `rem` 2 == 1 -> "odd"
- _ -> "even"
- cols'' <- sequence $ zipWith
- (\alignment item -> tableItemToTextile opts celltype alignment item)
- alignStrings cols'
- return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
-
-alignmentToString :: Alignment -> [Char]
-alignmentToString alignment = case alignment of
- AlignLeft -> "left"
- AlignRight -> "right"
- AlignCenter -> "center"
- AlignDefault -> "left"
-
-tableItemToTextile :: WriterOptions
- -> String
- -> String
- -> [Block]
- -> State WriterState String
-tableItemToTextile opts celltype align' item = do
- let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++
- x ++ "</" ++ celltype ++ ">"
- contents <- blockListToTextile opts item
- return $ mkcell contents
-
--- | Convert list of Pandoc block elements to Textile.
-blockListToTextile :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState String
-blockListToTextile opts blocks =
- mapM (blockToTextile opts) blocks >>= return . vcat
-
--- | Convert list of Pandoc inline elements to Textile.
-inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String
-inlineListToTextile opts lst =
- mapM (inlineToTextile opts) lst >>= return . concat
-
--- | Convert Pandoc inline element to Textile.
-inlineToTextile :: WriterOptions -> Inline -> State WriterState String
-
-inlineToTextile opts (Span _ lst) =
- inlineListToTextile opts lst
-
-inlineToTextile opts (Emph lst) = do
- contents <- inlineListToTextile opts lst
- return $ if '_' `elem` contents
- then "<em>" ++ contents ++ "</em>"
- else "_" ++ contents ++ "_"
-
-inlineToTextile opts (Strong lst) = do
- contents <- inlineListToTextile opts lst
- return $ if '*' `elem` contents
- then "<strong>" ++ contents ++ "</strong>"
- else "*" ++ contents ++ "*"
-
-inlineToTextile opts (Strikeout lst) = do
- contents <- inlineListToTextile opts lst
- return $ if '-' `elem` contents
- then "<del>" ++ contents ++ "</del>"
- else "-" ++ contents ++ "-"
-
-inlineToTextile opts (Superscript lst) = do
- contents <- inlineListToTextile opts lst
- return $ if '^' `elem` contents
- then "<sup>" ++ contents ++ "</sup>"
- else "[^" ++ contents ++ "^]"
-
-inlineToTextile opts (Subscript lst) = do
- contents <- inlineListToTextile opts lst
- return $ if '~' `elem` contents
- then "<sub>" ++ contents ++ "</sub>"
- else "[~" ++ contents ++ "~]"
-
-inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst
-
-inlineToTextile opts (Quoted SingleQuote lst) = do
- contents <- inlineListToTextile opts lst
- return $ "'" ++ contents ++ "'"
-
-inlineToTextile opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToTextile opts lst
- return $ "\"" ++ contents ++ "\""
-
-inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst
-
-inlineToTextile _ (Code _ str) =
- return $ if '@' `elem` str
- then "<tt>" ++ escapeStringForXML str ++ "</tt>"
- else "@" ++ str ++ "@"
-
-inlineToTextile _ (Str str) = return $ escapeStringForTextile str
-
-inlineToTextile _ (Math _ str) =
- return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>"
-
-inlineToTextile opts (RawInline f str)
- | f == Format "html" || f == Format "textile" = return str
- | (f == Format "latex" || f == Format "tex") &&
- isEnabled Ext_raw_tex opts = return str
- | otherwise = return ""
-
-inlineToTextile _ LineBreak = return "\n"
-
-inlineToTextile _ SoftBreak = return " "
-
-inlineToTextile _ Space = return " "
-
-inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do
- let classes = if null cls
- then ""
- else "(" ++ unwords cls ++ ")"
- label <- case txt of
- [Code _ s]
- | s == src -> return "$"
- [Str s]
- | s == src -> return "$"
- _ -> inlineListToTextile opts txt
- return $ "\"" ++ classes ++ label ++ "\":" ++ src
-
-inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do
- alt' <- inlineListToTextile opts alt
- let txt = if null tit
- then if null alt'
- then ""
- else "(" ++ alt' ++ ")"
- else "(" ++ tit ++ ")"
- classes = if null cls
- then ""
- else "(" ++ unwords cls ++ ")"
- showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";"
- in case (dimension dir attr) of
- Just (Percent a) -> toCss $ show (Percent a)
- Just dim -> toCss $ showInPixel opts dim ++ "px"
- Nothing -> Nothing
- styles = case (showDim Width, showDim Height) of
- (Just w, Just h) -> "{" ++ w ++ h ++ "}"
- (Just w, Nothing) -> "{" ++ w ++ "height:auto;}"
- (Nothing, Just h) -> "{" ++ "width:auto;" ++ h ++ "}"
- (Nothing, Nothing) -> ""
- return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!"
-
-inlineToTextile opts (Note contents) = do
- curNotes <- liftM stNotes get
- let newnum = length curNotes + 1
- contents' <- blockListToTextile opts contents
- let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n"
- modify $ \s -> s { stNotes = thisnote : curNotes }
- return $ "[" ++ show newnum ++ "]"
- -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
deleted file mode 100644
index d01ce0e8b..000000000
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ /dev/null
@@ -1,396 +0,0 @@
-{-
-Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.ZimWiki
- Copyright : Copyright (C) 2008-2015 John MacFarlane, 2017 Alex Ivkin
- License : GNU GPL, version 2 or above
-
- Maintainer : Alex Ivkin <alex@ivkin.net>
- Stability : beta
- Portability : portable
-
-Conversion of 'Pandoc' documents to ZimWiki markup.
-
-http://zim-wiki.org/manual/Help/Wiki_Syntax.html
--}
-
-module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where
-import Text.Pandoc.Definition
-import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) )
-import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr
- , substitute )
-import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
-import Text.Pandoc.ImageSize
-import Text.Pandoc.Templates ( renderTemplate' )
-import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf )
-import Data.Text ( breakOnAll, pack )
-import Data.Default (Default(..))
-import Network.URI ( isURI )
-import Control.Monad ( zipWithM )
-import Control.Monad.State ( modify, State, get, evalState )
-import Text.Pandoc.Class ( PandocMonad )
-import qualified Data.Map as Map
-
-data WriterState = WriterState {
- stItemNum :: Int,
- stIndent :: String, -- Indent after the marker at the beginning of list items
- stInTable :: Bool, -- Inside a table
- stInLink :: Bool -- Inside a link description
- }
-
-instance Default WriterState where
- def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False }
-
--- | Convert Pandoc to ZimWiki.
-writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
-writeZimWiki opts document = return $ evalState (pandocToZimWiki opts document) def
-
--- | Return ZimWiki representation of document.
-pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String
-pandocToZimWiki opts (Pandoc meta blocks) = do
- metadata <- metaToJSON opts
- (fmap trimr . blockListToZimWiki opts)
- (inlineListToZimWiki opts)
- meta
- body <- blockListToZimWiki opts blocks
- --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n"
- let main = body
- let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ metadata
- case writerTemplate opts of
- Just tpl -> return $ renderTemplate' tpl context
- Nothing -> return main
-
--- | Escape special characters for ZimWiki.
-escapeString :: String -> String
-escapeString = substitute "__" "''__''" .
- substitute "**" "''**''" .
- substitute "~~" "''~~''" .
- substitute "//" "''//''"
-
--- | Convert Pandoc block element to ZimWiki.
-blockToZimWiki :: WriterOptions -> Block -> State WriterState String
-
-blockToZimWiki _ Null = return ""
-
-blockToZimWiki opts (Div _attrs bs) = do
- contents <- blockListToZimWiki opts bs
- return $ contents ++ "\n"
-
-blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines
-
--- title beginning with fig: indicates that the image is a figure
--- ZimWiki doesn't support captions - so combine together alt and caption into alt
-blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
- capt <- if null txt
- then return ""
- else (" " ++) `fmap` inlineListToZimWiki opts txt
- let opt = if null txt
- then ""
- else "|" ++ if null tit then capt else tit ++ capt
- -- Relative links fail isURI and receive a colon
- prefix = if isURI src then "" else ":"
- return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n"
-
-blockToZimWiki opts (Para inlines) = do
- indent <- stIndent <$> get
- -- useTags <- stUseTags <$> get
- contents <- inlineListToZimWiki opts inlines
- return $ contents ++ if null indent then "\n" else ""
-
-blockToZimWiki opts (LineBlock lns) = do
- blockToZimWiki opts $ linesToPara lns
-
-blockToZimWiki opts (RawBlock f str)
- | f == Format "zimwiki" = return str
- | f == Format "html" = do cont <- indentFromHTML opts str; return cont
- | otherwise = return ""
-
-blockToZimWiki _ HorizontalRule = return "\n----\n"
-
-blockToZimWiki opts (Header level _ inlines) = do
- contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers
- let eqs = replicate ( 7 - level ) '='
- return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
-
-blockToZimWiki _ (CodeBlock (_,classes,_) str) = do
- -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using
- let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")]
- let langmap = Map.fromList langal
- return $ case classes of
- [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block
- (x:_) -> "{{{code: lang=\"" ++
- (case Map.lookup x langmap of
- Nothing -> x
- Just y -> y) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec
-
-blockToZimWiki opts (BlockQuote blocks) = do
- contents <- blockListToZimWiki opts blocks
- return $ unlines $ map ("> " ++) $ lines contents
-
-blockToZimWiki opts (Table capt aligns _ headers rows) = do
- captionDoc <- if null capt
- then return ""
- else do
- c <- inlineListToZimWiki opts capt
- return $ "" ++ c ++ "\n"
- headers' <- if all null headers
- then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0)
- else mapM (inlineListToZimWiki opts) (map removeFormatting headers) -- emphasis, links etc. are not allowed in table headers
- rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
- let widths = map (maximum . map length) $ transpose (headers':rows')
- let padTo (width, al) s =
- case (width - length s) of
- x | x > 0 ->
- if al == AlignLeft || al == AlignDefault
- then s ++ replicate x ' '
- else if al == AlignRight
- then replicate x ' ' ++ s
- else replicate (x `div` 2) ' ' ++
- s ++ replicate (x - x `div` 2) ' '
- | otherwise -> s
- let borderCell (width, al) _ =
- if al == AlignLeft
- then ":"++ replicate (width-1) '-'
- else if al == AlignDefault
- then replicate width '-'
- else if al == AlignRight
- then replicate (width-1) '-' ++ ":"
- else ":" ++ replicate (width-2) '-' ++ ":"
- let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|"
- let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|"
- return $ captionDoc ++
- (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++
- unlines (map renderRow rows')
-
-blockToZimWiki opts (BulletList items) = do
- indent <- stIndent <$> get
- modify $ \s -> s { stIndent = stIndent s ++ "\t" }
- contents <- (mapM (listItemToZimWiki opts) items)
- modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
- return $ vcat contents ++ if null indent then "\n" else ""
-
-blockToZimWiki opts (OrderedList _ items) = do
- indent <- stIndent <$> get
- modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 }
- contents <- (mapM (orderedListItemToZimWiki opts) items)
- modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
- return $ vcat contents ++ if null indent then "\n" else ""
-
-blockToZimWiki opts (DefinitionList items) = do
- contents <- (mapM (definitionListItemToZimWiki opts) items)
- return $ vcat contents
-
-definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String
-definitionListItemToZimWiki opts (label, items) = do
- labelText <- inlineListToZimWiki opts label
- contents <- mapM (blockListToZimWiki opts) items
- indent <- stIndent <$> get
- return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents
-
--- Auxiliary functions for lists:
-indentFromHTML :: WriterOptions -> String -> State WriterState String
-indentFromHTML _ str = do
- indent <- stIndent <$> get
- itemnum <- stItemNum <$> get
- if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "."
- else if isInfixOf "</li>" str then return "\n"
- else if isInfixOf "<li value=" str then do
- -- poor man's cut
- let val = drop 10 $ reverse $ drop 1 $ reverse str
- --let val = take ((length valls) - 2) valls
- modify $ \s -> s { stItemNum = read val }
- return ""
- else if isInfixOf "<ol>" str then do
- let olcount=countSubStrs "<ol>" str
- modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 }
- return ""
- else if isInfixOf "</ol>" str then do
- let olcount=countSubStrs "/<ol>" str
- modify $ \s -> s{ stIndent = drop olcount (stIndent s) }
- return ""
- else
- return ""
-
-countSubStrs :: String -> String -> Int
-countSubStrs sub str = length $ breakOnAll (pack sub) (pack str)
-
-cleanupCode :: String -> String
-cleanupCode = substitute "<nowiki>" "" . substitute "</nowiki>" ""
-
-vcat :: [String] -> String
-vcat = intercalate "\n"
-
--- | Convert bullet list item (list of blocks) to ZimWiki.
-listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
-listItemToZimWiki opts items = do
- contents <- blockListToZimWiki opts items
- indent <- stIndent <$> get
- return $ indent ++ "* " ++ contents
-
--- | Convert ordered list item (list of blocks) to ZimWiki.
-orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
-orderedListItemToZimWiki opts items = do
- contents <- blockListToZimWiki opts items
- indent <- stIndent <$> get
- itemnum <- stItemNum <$> get
- --modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering
- return $ indent ++ show itemnum ++ ". " ++ contents
-
--- Auxiliary functions for tables:
-tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String
-tableItemToZimWiki opts align' item = do
- let mkcell x = (if align' == AlignRight || align' == AlignCenter
- then " "
- else "") ++ x ++
- (if align' == AlignLeft || align' == AlignCenter
- then " "
- else "")
- modify $ \s -> s { stInTable = True }
- contents <- blockListToZimWiki opts item
- modify $ \s -> s { stInTable = False }
- return $ mkcell contents
-
--- | Convert list of Pandoc block elements to ZimWiki.
-blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String
-blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks
-
--- | Convert list of Pandoc inline elements to ZimWiki.
-inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String
-inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst)
-
--- | Convert Pandoc inline element to ZimWiki.
-inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String
-
-inlineToZimWiki opts (Emph lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "//" ++ contents ++ "//"
-
-inlineToZimWiki opts (Strong lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "**" ++ contents ++ "**"
-
-inlineToZimWiki opts (Strikeout lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "~~" ++ contents ++ "~~"
-
-inlineToZimWiki opts (Superscript lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "^{" ++ contents ++ "}"
-
-inlineToZimWiki opts (Subscript lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "_{" ++ contents ++ "}"
-
-inlineToZimWiki opts (Quoted SingleQuote lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "\8216" ++ contents ++ "\8217"
-
-inlineToZimWiki opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToZimWiki opts lst
- return $ "\8220" ++ contents ++ "\8221"
-
-inlineToZimWiki opts (Span _attrs ils) = inlineListToZimWiki opts ils
-
-inlineToZimWiki opts (SmallCaps lst) = inlineListToZimWiki opts lst
-
-inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst
-
-inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''"
-
-inlineToZimWiki _ (Str str) = do
- inTable <- stInTable <$> get
- inLink <- stInLink <$> get
- if inTable
- then return $ substitute "|" "\\|" . escapeString $ str
- else
- if inLink
- then return $ str
- else return $ escapeString str
-
-inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped
- where delim = case mathType of
- DisplayMath -> "$$"
- InlineMath -> "$"
-
--- | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
-inlineToZimWiki opts (RawInline f str)
- | f == Format "zimwiki" = return str
- | f == Format "html" = do cont <- indentFromHTML opts str; return cont
- | otherwise = return ""
-
-inlineToZimWiki _ LineBreak = do
- inTable <- stInTable <$> get
- if inTable
- then return "\\n"
- else return "\n"
-
-inlineToZimWiki opts SoftBreak =
- case writerWrapText opts of
- WrapNone -> return " "
- WrapAuto -> return " "
- WrapPreserve -> return "\n"
-
-inlineToZimWiki _ Space = return " "
-
-inlineToZimWiki opts (Link _ txt (src, _)) = do
- inTable <- stInTable <$> get
- modify $ \s -> s { stInLink = True }
- label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it
- modify $ \s -> s { stInLink = False }
- let label'= if inTable
- then "" -- no label is allowed in a table
- else "|"++label
- case txt of
- [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
- | escapeURI s == src -> return src
- _ -> if isURI src
- then return $ "[[" ++ src ++ label' ++ "]]"
- else return $ "[[" ++ src' ++ label' ++ "]]"
- where src' = case src of
- '/':xs -> xs -- with leading / it's a
- _ -> src -- link to a help page
-inlineToZimWiki opts (Image attr alt (source, tit)) = do
- alt' <- inlineListToZimWiki opts alt
- inTable <- stInTable <$> get
- let txt = case (tit, alt, inTable) of
- ("",[], _) -> ""
- ("", _, False ) -> "|" ++ alt'
- (_ , _, False ) -> "|" ++ tit
- (_ , _, True ) -> ""
- -- Relative links fail isURI and receive a colon
- prefix = if isURI source then "" else ":"
- return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}"
-
-inlineToZimWiki opts (Note contents) = do
- -- no concept of notes in zim wiki, use a text block
- contents' <- blockListToZimWiki opts contents
- return $ " **{Note:** " ++ trimr contents' ++ "**}**"
-
-imageDims :: WriterOptions -> Attr -> String
-imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
- where
- toPx = fmap (showInPixel opts) . checkPct
- checkPct (Just (Percent _)) = Nothing
- checkPct maybeDim = maybeDim
- go (Just w) Nothing = "?" ++ w
- go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
- go Nothing (Just h) = "?0x" ++ h
- go Nothing Nothing = ""
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
deleted file mode 100644
index e105aee91..000000000
--- a/src/Text/Pandoc/XML.hs
+++ /dev/null
@@ -1,115 +0,0 @@
-{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.XML
- Copyright : Copyright (C) 2006-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for escaping and formatting XML.
--}
-module Text.Pandoc.XML ( escapeCharForXML,
- escapeStringForXML,
- inTags,
- selfClosingTag,
- inTagsSimple,
- inTagsIndented,
- toEntities,
- fromEntities ) where
-
-import Text.Pandoc.Pretty
-import Data.Char (ord, isAscii, isSpace)
-import Text.HTML.TagSoup.Entity (lookupEntity)
-
--- | Escape one character as needed for XML.
-escapeCharForXML :: Char -> String
-escapeCharForXML x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- c -> [c]
-
--- | Escape string as needed for XML. Entity references are not preserved.
-escapeStringForXML :: String -> String
-escapeStringForXML = concatMap escapeCharForXML
-
--- | Escape newline characters as &#10;
-escapeNls :: String -> String
-escapeNls (x:xs)
- | x == '\n' = "&#10;" ++ escapeNls xs
- | otherwise = x : escapeNls xs
-escapeNls [] = []
-
--- | Return a text object with a string of formatted XML attributes.
-attributeList :: [(String, String)] -> Doc
-attributeList = hcat . map
- (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
- escapeNls (escapeStringForXML b) ++ "\""))
-
--- | Put the supplied contents between start and end tags of tagType,
--- with specified attributes and (if specified) indentation.
-inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
-inTags isIndented tagType attribs contents =
- let openTag = char '<' <> text tagType <> attributeList attribs <>
- char '>'
- closeTag = text "</" <> text tagType <> char '>'
- in if isIndented
- then openTag $$ nest 2 contents $$ closeTag
- else openTag <> contents <> closeTag
-
--- | Return a self-closing tag of tagType with specified attributes
-selfClosingTag :: String -> [(String, String)] -> Doc
-selfClosingTag tagType attribs =
- char '<' <> text tagType <> attributeList attribs <> text " />"
-
--- | Put the supplied contents between start and end tags of tagType.
-inTagsSimple :: String -> Doc -> Doc
-inTagsSimple tagType = inTags False tagType []
-
--- | Put the supplied contents in indented block btw start and end tags.
-inTagsIndented :: String -> Doc -> Doc
-inTagsIndented tagType = inTags True tagType []
-
--- | Escape all non-ascii characters using numerical entities.
-toEntities :: String -> String
-toEntities [] = ""
-toEntities (c:cs)
- | isAscii c = c : toEntities cs
- | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
-
--- Unescapes XML entities
-fromEntities :: String -> String
-fromEntities ('&':xs) =
- case lookupEntity ent' of
- Just c -> c ++ fromEntities rest
- Nothing -> '&' : fromEntities xs
- where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
- (zs,';':ys) -> (zs,ys)
- (zs, ys) -> (zs,ys)
- ent' = case ent of
- '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
-
-fromEntities (x:xs) = x : fromEntities xs
-fromEntities [] = []