aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs1444
-rw-r--r--src/Text/Pandoc/Asciify.hs422
-rw-r--r--src/Text/Pandoc/CSS.hs43
-rw-r--r--src/Text/Pandoc/Class.hs539
-rw-r--r--src/Text/Pandoc/Compat/Time.hs30
-rw-r--r--src/Text/Pandoc/Data.hsb15
-rw-r--r--src/Text/Pandoc/Emoji.hs906
-rw-r--r--src/Text/Pandoc/Error.hs76
-rw-r--r--src/Text/Pandoc/Extensions.hs267
-rw-r--r--src/Text/Pandoc/Highlighting.hs223
-rw-r--r--src/Text/Pandoc/ImageSize.hs547
-rw-r--r--src/Text/Pandoc/Logging.hs232
-rw-r--r--src/Text/Pandoc/MIME.hs527
-rw-r--r--src/Text/Pandoc/MediaBag.hs113
-rw-r--r--src/Text/Pandoc/Options.hs217
-rw-r--r--src/Text/Pandoc/PDF.hs369
-rw-r--r--src/Text/Pandoc/Parsing.hs1329
-rw-r--r--src/Text/Pandoc/Pretty.hs557
-rw-r--r--src/Text/Pandoc/Process.hs98
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs128
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs1055
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs626
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs154
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs229
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs1044
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs108
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs47
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs279
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs1136
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs160
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1437
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2119
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs677
-rw-r--r--src/Text/Pandoc/Readers/Native.hs71
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs103
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs113
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs253
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs495
-rw-r--r--src/Text/Pandoc/Readers/Odt/Base.hs43
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs929
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs260
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs62
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs48
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs171
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs1063
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs110
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs744
-rw-r--r--src/Text/Pandoc/Readers/Org.hs62
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs137
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs979
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs172
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs880
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs218
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs259
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs217
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs97
-rw-r--r--src/Text/Pandoc/Readers/RST.hs1354
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs525
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs729
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs596
-rw-r--r--src/Text/Pandoc/SelfContained.hs181
-rw-r--r--src/Text/Pandoc/Shared.hs883
-rw-r--r--src/Text/Pandoc/Slides.hs63
-rw-r--r--src/Text/Pandoc/Templates.hs77
-rw-r--r--src/Text/Pandoc/UTF8.hs121
-rw-r--r--src/Text/Pandoc/UUID.hs78
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs470
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs190
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs481
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs322
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs440
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs1302
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs522
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs1257
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs617
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs1069
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs370
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs584
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs1388
-rw-r--r--src/Text/Pandoc/Writers/Man.hs381
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs1147
-rw-r--r--src/Text/Pandoc/Writers/Math.hs49
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs442
-rw-r--r--src/Text/Pandoc/Writers/Native.hs79
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs210
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs103
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs626
-rw-r--r--src/Text/Pandoc/Writers/Org.hs411
-rw-r--r--src/Text/Pandoc/Writers/RST.hs556
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs412
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs183
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs324
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs498
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs486
-rw-r--r--src/Text/Pandoc/Writers/ZimWiki.hs396
-rw-r--r--src/Text/Pandoc/XML.hs115
96 files changed, 44676 insertions, 0 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
new file mode 100644
index 000000000..be8f26811
--- /dev/null
+++ b/src/Text/Pandoc/App.hs
@@ -0,0 +1,1444 @@
+{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-}
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.App
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Does a pandoc conversion based on command-line options.
+-}
+module Text.Pandoc.App (
+ convertWithOpts
+ , Opt(..)
+ , defaultOpts
+ , parseOptions
+ , options
+ ) where
+import Text.Pandoc
+import Text.Pandoc.Builder (setMeta)
+import Text.Pandoc.PDF (makePDF)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8,
+ headerShift, err, openURL, safeRead,
+ readDataFile )
+import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
+import Text.Pandoc.XML ( toEntities )
+import Text.Pandoc.Highlighting (highlightingStyles)
+import Text.Pandoc.SelfContained ( makeSelfContained )
+import Text.Pandoc.Process (pipeProcess)
+import Skylighting ( Style, defaultSyntaxMap, Syntax(..) )
+import Text.Printf
+import System.Environment ( getEnvironment, getProgName, getArgs )
+import Control.Applicative ((<|>))
+import System.Exit ( ExitCode (..), exitSuccess )
+import System.FilePath
+import Data.Char ( toLower, toUpper )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )
+import System.Directory ( getAppUserDataDirectory, findExecutable,
+ doesFileExist, Permissions(..), getPermissions )
+import System.IO ( stdout, stderr )
+import System.IO.Error ( isDoesNotExistError )
+import qualified Control.Exception as E
+import Control.Exception.Extensible ( throwIO )
+import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad
+import Control.Monad.Trans
+import Data.Maybe (fromMaybe, isNothing, isJust)
+import Data.Foldable (foldrM)
+import Network.URI (parseURI, isURI, URI(..))
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString as BS
+import qualified Data.Map as M
+import Data.Aeson (eitherDecode', encode)
+import Data.Yaml (decode)
+import qualified Data.Yaml as Yaml
+import qualified Data.Text as T
+import System.Console.GetOpt
+import Text.Pandoc.Class (withMediaBag, PandocIO, getLog)
+import Paths_pandoc (getDataDir)
+#ifndef _WINDOWS
+import System.Posix.Terminal (queryTerminal)
+import System.Posix.IO (stdOutput)
+#endif
+
+parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
+parseOptions options' defaults = do
+ rawArgs <- map UTF8.decodeArg <$> getArgs
+ prg <- getProgName
+
+ let (actions, args, unrecognizedOpts, errors) =
+ getOpt' Permute options' rawArgs
+
+ let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts
+
+ unless (null errors && null unknownOptionErrors) $
+ err 2 $ concat errors ++ unlines unknownOptionErrors ++
+ ("Try " ++ prg ++ " --help for more information.")
+
+ -- thread option data structure through all supplied option actions
+ opts <- foldl (>>=) (return defaults) actions
+ return (opts{ optInputFiles = args })
+
+convertWithOpts :: Opt -> IO ()
+convertWithOpts opts = do
+ let args = optInputFiles opts
+ let outputFile = optOutputFile opts
+ let filters = optFilters opts
+ let verbosity = optVerbosity opts
+
+ when (optDumpArgs opts) $
+ do UTF8.hPutStrLn stdout outputFile
+ mapM_ (UTF8.hPutStrLn stdout) args
+ exitSuccess
+
+ epubStylesheet <- case optEpubStylesheet opts of
+ Nothing -> return Nothing
+ Just fp -> Just <$> UTF8.readFile fp
+
+ epubMetadata <- case optEpubMetadata opts of
+ Nothing -> return Nothing
+ Just fp -> Just <$> UTF8.readFile fp
+
+ let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css"
+ let mathMethod =
+ case (optKaTeXJS opts, optKaTeXStylesheet opts) of
+ (Nothing, _) -> optHTMLMathMethod opts
+ (Just js, ss) -> KaTeX js (fromMaybe csscdn ss)
+
+
+ -- --bibliography implies -F pandoc-citeproc for backwards compatibility:
+ let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) &&
+ optCiteMethod opts `notElem` [Natbib, Biblatex] &&
+ "pandoc-citeproc" `notElem` map takeBaseName filters
+ let filters' = if needsCiteproc then "pandoc-citeproc" : filters
+ else filters
+
+ let sources = case args of
+ [] -> ["-"]
+ xs | optIgnoreArgs opts -> ["-"]
+ | otherwise -> xs
+
+ datadir <- case optDataDir opts of
+ Nothing -> E.catch
+ (Just <$> getAppUserDataDirectory "pandoc")
+ (\e -> let _ = (e :: E.SomeException)
+ in return Nothing)
+ Just _ -> return $ optDataDir opts
+
+ -- assign reader and writer based on options and filenames
+ let readerName = case optReader opts of
+ Nothing -> defaultReaderName
+ (if any isURI sources
+ then "html"
+ else "markdown") sources
+ Just x -> map toLower x
+
+ let writerName = case optWriter opts of
+ Nothing -> defaultWriterName outputFile
+ Just x -> map toLower x
+ let format = takeWhile (`notElem` ['+','-'])
+ $ takeFileName writerName -- in case path to lua script
+
+ let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+
+ let laTeXOutput = format `elem` ["latex", "beamer"]
+ let conTeXtOutput = format == "context"
+ let html5Output = format == "html5" || format == "html"
+
+ -- disabling the custom writer for now
+ writer <- if ".lua" `isSuffixOf` format
+ -- note: use non-lowercased version writerName
+ then error "custom writers disabled for now"
+ else case getWriter writerName of
+ Left e -> err 9 $
+ if format == "pdf"
+ then e ++
+ "\nTo create a pdf with pandoc, use " ++
+ "the latex or beamer writer and specify\n" ++
+ "an output file with .pdf extension " ++
+ "(pandoc -t latex -o filename.pdf)."
+ else e
+ Right w -> return (w :: Writer PandocIO)
+
+ -- TODO: we have to get the input and the output into the state for
+ -- the sake of the text2tags reader.
+ reader <- case getReader readerName of
+ Right r -> return (r :: Reader PandocIO)
+ Left e -> err 7 e'
+ where e' = case readerName of
+ "pdf" -> e ++
+ "\nPandoc can convert to PDF, but not from PDF."
+ "doc" -> e ++
+ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc."
+ _ -> e
+
+ let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
+
+ templ <- case optTemplate opts of
+ _ | not standalone -> return Nothing
+ Nothing -> do
+ deftemp <- getDefaultTemplate datadir format
+ case deftemp of
+ Left e -> throwIO e
+ Right t -> return (Just t)
+ Just tp -> do
+ -- strip off extensions
+ let tp' = case takeExtension tp of
+ "" -> tp <.> format
+ _ -> tp
+ Just <$> E.catch (UTF8.readFile tp')
+ (\e -> if isDoesNotExistError e
+ then E.catch
+ (readDataFileUTF8 datadir
+ ("templates" </> tp'))
+ (\e' -> let _ = (e' :: E.SomeException)
+ in throwIO e')
+ else throwIO e)
+
+ let addStringAsVariable varname s vars = return $ (varname, s) : vars
+
+ let addContentsAsVariable varname fp vars = do
+ s <- UTF8.readFile fp
+ return $ (varname, s) : vars
+
+ -- note: this reverses the list constructed in option parsing,
+ -- which in turn was reversed from the command-line order,
+ -- so we end up with the correct order in the variable list:
+ let withList _ [] vars = return vars
+ withList f (x:xs) vars = f x vars >>= withList f xs
+
+ variables <- return (optVariables opts)
+ >>=
+ withList (addContentsAsVariable "include-before")
+ (optIncludeBeforeBody opts)
+ >>=
+ withList (addContentsAsVariable "include-after")
+ (optIncludeAfterBody opts)
+ >>=
+ withList (addContentsAsVariable "header-includes")
+ (optIncludeInHeader opts)
+ >>=
+ withList (addStringAsVariable "css") (optCss opts)
+ >>=
+ maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts)
+ >>=
+ maybe return (addStringAsVariable "epub-cover-image")
+ (optEpubCoverImage opts)
+ >>=
+ (\vars -> case mathMethod of
+ LaTeXMathML Nothing -> do
+ s <- readDataFileUTF8 datadir "LaTeXMathML.js"
+ return $ ("mathml-script", s) : vars
+ _ -> return vars)
+ >>=
+ (\vars -> if format == "dzslides"
+ then do
+ dztempl <- readDataFileUTF8 datadir
+ ("dzslides" </> "template.html")
+ let dzline = "<!-- {{{{ dzslides core"
+ let dzcore = unlines
+ $ dropWhile (not . (dzline `isPrefixOf`))
+ $ lines dztempl
+ return $ ("dzslides-core", dzcore) : vars
+ else return vars)
+
+ let sourceURL = case sources of
+ [] -> Nothing
+ (x:_) -> case parseURI x of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
+
+ let readerOpts = def{ readerStandalone = standalone
+ , readerColumns = optColumns opts
+ , readerTabStop = optTabStop opts
+ , readerIndentedCodeClasses = optIndentedCodeClasses opts
+ , readerApplyMacros = not laTeXOutput
+ , readerDefaultImageExtension =
+ optDefaultImageExtension opts
+ , readerTrackChanges = optTrackChanges opts
+ }
+
+ highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
+
+ let writerOptions = def { writerTemplate = templ,
+ writerVariables = variables,
+ writerTabStop = optTabStop opts,
+ writerTableOfContents = optTableOfContents opts,
+ writerHTMLMathMethod = mathMethod,
+ writerIncremental = optIncremental opts,
+ writerCiteMethod = optCiteMethod opts,
+ writerNumberSections = optNumberSections opts,
+ writerNumberOffset = optNumberOffset opts,
+ writerSectionDivs = optSectionDivs opts,
+ writerReferenceLinks = optReferenceLinks opts,
+ writerReferenceLocation = optReferenceLocation opts,
+ writerDpi = optDpi opts,
+ writerWrapText = optWrapText opts,
+ writerColumns = optColumns opts,
+ writerEmailObfuscation = optEmailObfuscation opts,
+ writerIdentifierPrefix = optIdentifierPrefix opts,
+ writerSourceURL = sourceURL,
+ writerUserDataDir = datadir,
+ writerHtmlQTags = optHtmlQTags opts,
+ writerTopLevelDivision = optTopLevelDivision opts,
+ writerListings = optListings opts,
+ writerSlideLevel = optSlideLevel opts,
+ writerHighlightStyle = highlightStyle,
+ writerSetextHeaders = optSetextHeaders opts,
+ writerEpubMetadata = epubMetadata,
+ writerEpubStylesheet = epubStylesheet,
+ writerEpubFonts = optEpubFonts opts,
+ writerEpubChapterLevel = optEpubChapterLevel opts,
+ writerTOCDepth = optTOCDepth opts,
+ writerReferenceDoc = optReferenceDoc opts,
+ writerLaTeXArgs = optLaTeXEngineArgs opts
+ }
+
+
+#ifdef _WINDOWS
+ let istty = True
+#else
+ istty <- queryTerminal stdOutput
+#endif
+ when (istty && not (isTextFormat format) && outputFile == "-") $
+ err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option."
+
+
+ let transforms = case optBaseHeaderLevel opts of
+ x | x > 1 -> [headerShift (x - 1)]
+ | otherwise -> []
+
+ let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
+ then 0
+ else optTabStop opts)
+
+ readSources :: (Functor m, MonadIO m) => [FilePath] -> m String
+ readSources srcs = convertTabs . intercalate "\n" <$>
+ mapM readSource srcs
+
+ let runIO' :: PandocIO a -> IO a
+ runIO' f = do
+ (res, reports) <- runIOorExplode $ do
+ setVerbosity verbosity
+ x <- f
+ rs <- getLog
+ return (x, rs)
+ case optLogFile opts of
+ Nothing -> return ()
+ Just logfile -> B.writeFile logfile (encodeLogMessages reports)
+ let isWarning msg = messageVerbosity msg == WARNING
+ when (optFailIfWarnings opts && any isWarning reports) $
+ err 3 "Failing because there were warnings."
+ return res
+
+ let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag)
+ sourceToDoc sources' =
+ case reader of
+ StringReader r
+ | optFileScope opts || readerName == "json" -> do
+ pairs <- mapM
+ (readSource >=> withMediaBag . r readerOpts) sources
+ return (mconcat (map fst pairs), mconcat (map snd pairs))
+ | otherwise ->
+ readSources sources' >>= withMediaBag . r readerOpts
+ ByteStringReader r -> do
+ pairs <- mapM (readFile' >=>
+ withMediaBag . r readerOpts) sources
+ return (mconcat (map fst pairs), mconcat (map snd pairs))
+
+ runIO' $ do
+ (doc, media) <- sourceToDoc sources
+ doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=>
+ return . flip (foldr addMetadata) (optMetadata opts) >=>
+ applyTransforms transforms >=>
+ applyFilters datadir filters' [format]) doc
+
+ case writer of
+ -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile
+ ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile
+ StringWriter f
+ | pdfOutput -> do
+ -- make sure writer is latex or beamer or context or html5
+ unless (laTeXOutput || conTeXtOutput || html5Output) $
+ err 47 $ "cannot produce pdf output with " ++ format ++
+ " writer"
+
+ let pdfprog = case () of
+ _ | conTeXtOutput -> "context"
+ _ | html5Output -> "wkhtmltopdf"
+ _ -> optLaTeXEngine opts
+ -- check for pdf creating program
+ mbPdfProg <- liftIO $ findExecutable pdfprog
+ when (isNothing mbPdfProg) $
+ err 41 $ pdfprog ++ " not found. " ++
+ pdfprog ++ " is needed for pdf output."
+
+ res <- makePDF pdfprog f writerOptions verbosity media doc'
+ case res of
+ Right pdf -> writeFnBinary outputFile pdf
+ Left err' -> liftIO $ do
+ B.hPutStr stderr err'
+ B.hPut stderr $ B.pack [10]
+ err 43 "Error producing PDF"
+ | otherwise -> do
+ let htmlFormat = format `elem`
+ ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
+ selfcontain = if optSelfContained opts && htmlFormat
+ then makeSelfContained writerOptions media
+ else return
+ handleEntities = if htmlFormat && optAscii opts
+ then toEntities
+ else id
+ output <- f writerOptions doc'
+ selfcontain (output ++ ['\n' | not standalone]) >>=
+ writerFn outputFile . handleEntities
+
+type Transform = Pandoc -> Pandoc
+
+isTextFormat :: String -> Bool
+isTextFormat s = s `notElem` ["odt","docx","epub","epub3"]
+
+externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc
+externalFilter f args' d = liftIO $ do
+ exists <- doesFileExist f
+ isExecutable <- if exists
+ then executable <$> getPermissions f
+ else return True
+ let (f', args'') = if exists
+ then case map toLower (takeExtension f) of
+ _ | isExecutable -> ("." </> f, args')
+ ".py" -> ("python", f:args')
+ ".hs" -> ("runhaskell", f:args')
+ ".pl" -> ("perl", f:args')
+ ".rb" -> ("ruby", f:args')
+ ".php" -> ("php", f:args')
+ ".js" -> ("node", f:args')
+ _ -> (f, args')
+ else (f, args')
+ unless (exists && isExecutable) $ do
+ mbExe <- findExecutable f'
+ when (isNothing mbExe) $
+ err 83 $ "Error running filter " ++ f ++ ":\n" ++
+ "Could not find executable '" ++ f' ++ "'."
+ env <- getEnvironment
+ let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env
+ (exitcode, outbs) <- E.handle filterException $
+ pipeProcess env' f' args'' $ encode d
+ case exitcode of
+ ExitSuccess -> return $ either error id $ eitherDecode' outbs
+ ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++
+ "Filter returned error status " ++ show ec
+ where filterException :: E.SomeException -> IO a
+ filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++
+ show e
+
+-- | Data structure for command line options.
+data Opt = Opt
+ { optTabStop :: Int -- ^ Number of spaces per tab
+ , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces
+ , optStandalone :: Bool -- ^ Include header, footer
+ , optReader :: Maybe String -- ^ Reader format
+ , optWriter :: Maybe String -- ^ Writer format
+ , optTableOfContents :: Bool -- ^ Include table of contents
+ , optBaseHeaderLevel :: Int -- ^ Base header level
+ , optTemplate :: Maybe FilePath -- ^ Custom template
+ , optVariables :: [(String,String)] -- ^ Template variables to set
+ , optMetadata :: [(String, String)] -- ^ Metadata fields to set
+ , optOutputFile :: FilePath -- ^ Name of output file
+ , optInputFiles :: [FilePath] -- ^ Names of input files
+ , optNumberSections :: Bool -- ^ Number sections in LaTeX
+ , optNumberOffset :: [Int] -- ^ Starting number for sections
+ , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5
+ , optSelfContained :: Bool -- ^ Make HTML accessible offline
+ , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML
+ , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code
+ , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions
+ , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
+ , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
+ , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet
+ , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
+ , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
+ , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters
+ , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub
+ , optTOCDepth :: Int -- ^ Number of levels to include in TOC
+ , optDumpArgs :: Bool -- ^ Output command-line arguments
+ , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
+ , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output
+ , optLogFile :: Maybe FilePath -- ^ File to write JSON log output
+ , optFailIfWarnings :: Bool -- ^ Fail on warnings
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output
+ , optDpi :: Int -- ^ Dpi
+ , optWrapText :: WrapOption -- ^ Options for wrapping text
+ , optColumns :: Int -- ^ Line length in characters
+ , optFilters :: [FilePath] -- ^ Filters to apply
+ , optEmailObfuscation :: ObfuscationMethod
+ , optIdentifierPrefix :: String
+ , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
+ , optDataDir :: Maybe FilePath
+ , optCiteMethod :: CiteMethod -- ^ Method to output cites
+ , optListings :: Bool -- ^ Use listings package for code blocks
+ , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
+ , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine
+ , optSlideLevel :: Maybe Int -- ^ Header level that creates slides
+ , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
+ , optAscii :: Bool -- ^ Use ascii characters only in html
+ , optDefaultImageExtension :: String -- ^ Default image extension
+ , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
+ , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
+ , optFileScope :: Bool -- ^ Parse input files before combining
+ , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX
+ , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX
+ , optTitlePrefix :: Maybe String -- ^ Prefix for title
+ , optCss :: [FilePath] -- ^ CSS files to link to
+ , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before
+ , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body
+ , optIncludeInHeader :: [FilePath] -- ^ Files to include in header
+ }
+
+-- | Defaults for command-line options.
+defaultOpts :: Opt
+defaultOpts = Opt
+ { optTabStop = 4
+ , optPreserveTabs = False
+ , optStandalone = False
+ , optReader = Nothing
+ , optWriter = Nothing
+ , optTableOfContents = False
+ , optBaseHeaderLevel = 1
+ , optTemplate = Nothing
+ , optVariables = []
+ , optMetadata = []
+ , optOutputFile = "-" -- "-" means stdout
+ , optInputFiles = []
+ , optNumberSections = False
+ , optNumberOffset = [0,0,0,0,0,0]
+ , optSectionDivs = False
+ , optIncremental = False
+ , optSelfContained = False
+ , optHtmlQTags = False
+ , optHighlightStyle = Just "pygments"
+ , optTopLevelDivision = TopLevelDefault
+ , optHTMLMathMethod = PlainMath
+ , optReferenceDoc = Nothing
+ , optEpubStylesheet = Nothing
+ , optEpubMetadata = Nothing
+ , optEpubFonts = []
+ , optEpubChapterLevel = 1
+ , optEpubCoverImage = Nothing
+ , optTOCDepth = 3
+ , optDumpArgs = False
+ , optIgnoreArgs = False
+ , optVerbosity = WARNING
+ , optLogFile = Nothing
+ , optFailIfWarnings = False
+ , optReferenceLinks = False
+ , optReferenceLocation = EndOfDocument
+ , optDpi = 96
+ , optWrapText = WrapAuto
+ , optColumns = 72
+ , optFilters = []
+ , optEmailObfuscation = NoObfuscation
+ , optIdentifierPrefix = ""
+ , optIndentedCodeClasses = []
+ , optDataDir = Nothing
+ , optCiteMethod = Citeproc
+ , optListings = False
+ , optLaTeXEngine = "pdflatex"
+ , optLaTeXEngineArgs = []
+ , optSlideLevel = Nothing
+ , optSetextHeaders = True
+ , optAscii = False
+ , optDefaultImageExtension = ""
+ , optExtractMedia = Nothing
+ , optTrackChanges = AcceptChanges
+ , optFileScope = False
+ , optKaTeXStylesheet = Nothing
+ , optKaTeXJS = Nothing
+ , optTitlePrefix = Nothing
+ , optCss = []
+ , optIncludeBeforeBody = []
+ , optIncludeAfterBody = []
+ , optIncludeInHeader = []
+ }
+
+addMetadata :: (String, String) -> Pandoc -> Pandoc
+addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs
+ where meta' = case lookupMeta k meta of
+ Nothing -> setMeta k v' meta
+ Just (MetaList xs) ->
+ setMeta k (MetaList (xs ++ [v'])) meta
+ Just x -> setMeta k (MetaList [x, v']) meta
+ v' = readMetaValue v
+
+readMetaValue :: String -> MetaValue
+readMetaValue s = case decode (UTF8.fromString s) of
+ Just (Yaml.String t) -> MetaString $ T.unpack t
+ Just (Yaml.Bool b) -> MetaBool b
+ _ -> MetaString s
+
+-- Determine default reader based on source file extensions
+defaultReaderName :: String -> [FilePath] -> String
+defaultReaderName fallback [] = fallback
+defaultReaderName fallback (x:xs) =
+ case takeExtension (map toLower x) of
+ ".xhtml" -> "html"
+ ".html" -> "html"
+ ".htm" -> "html"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".rst" -> "rst"
+ ".org" -> "org"
+ ".lhs" -> "markdown+lhs"
+ ".db" -> "docbook"
+ ".opml" -> "opml"
+ ".wiki" -> "mediawiki"
+ ".dokuwiki" -> "dokuwiki"
+ ".textile" -> "textile"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".docx" -> "docx"
+ ".t2t" -> "t2t"
+ ".epub" -> "epub"
+ ".odt" -> "odt"
+ ".pdf" -> "pdf" -- so we get an "unknown reader" error
+ ".doc" -> "doc" -- so we get an "unknown reader" error
+ _ -> defaultReaderName fallback xs
+
+-- Determine default writer based on output file extension
+defaultWriterName :: FilePath -> String
+defaultWriterName "-" = "html" -- no output file
+defaultWriterName x =
+ case takeExtension (map toLower x) of
+ "" -> "markdown" -- empty extension
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".context" -> "context"
+ ".ctx" -> "context"
+ ".rtf" -> "rtf"
+ ".rst" -> "rst"
+ ".s5" -> "s5"
+ ".native" -> "native"
+ ".json" -> "json"
+ ".txt" -> "markdown"
+ ".text" -> "markdown"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".textile" -> "textile"
+ ".lhs" -> "markdown+lhs"
+ ".texi" -> "texinfo"
+ ".texinfo" -> "texinfo"
+ ".db" -> "docbook"
+ ".odt" -> "odt"
+ ".docx" -> "docx"
+ ".epub" -> "epub"
+ ".org" -> "org"
+ ".asciidoc" -> "asciidoc"
+ ".adoc" -> "asciidoc"
+ ".pdf" -> "latex"
+ ".fb2" -> "fb2"
+ ".opml" -> "opml"
+ ".icml" -> "icml"
+ ".tei.xml" -> "tei"
+ ".tei" -> "tei"
+ ['.',y] | y `elem` ['1'..'9'] -> "man"
+ _ -> "html"
+
+-- Transformations of a Pandoc document post-parsing:
+
+extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc
+extractMedia media dir d =
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ extractMediaBag True dir media
+ return $ walk (adjustImagePath dir fps) d
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
+applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
+applyTransforms transforms d = return $ foldr ($) d transforms
+
+ -- First we check to see if a filter is found. If not, and if it's
+ -- not an absolute path, we check to see whether it's in `userdir/filters`.
+ -- If not, we leave it unchanged.
+expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath
+expandFilterPath mbDatadir fp = liftIO $ do
+ fpExists <- doesFileExist fp
+ if fpExists
+ then return fp
+ else case mbDatadir of
+ Just datadir | isRelative fp -> do
+ let filterPath = (datadir </> "filters" </> fp)
+ filterPathExists <- doesFileExist filterPath
+ if filterPathExists
+ then return filterPath
+ else return fp
+ _ -> return fp
+
+applyFilters :: MonadIO m
+ => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
+applyFilters mbDatadir filters args d = do
+ expandedFilters <- mapM (expandFilterPath mbDatadir) filters
+ foldrM ($) d $ map (flip externalFilter args) expandedFilters
+
+readSource :: MonadIO m => FilePath -> m String
+readSource "-" = liftIO UTF8.getContents
+readSource src = case parseURI src of
+ Just u | uriScheme u `elem` ["http:","https:"] ->
+ readURI src
+ | uriScheme u == "file:" ->
+ liftIO $ UTF8.readFile (uriPath u)
+ _ -> liftIO $ UTF8.readFile src
+
+readURI :: MonadIO m => FilePath -> m String
+readURI src = do
+ res <- liftIO $ openURL src
+ case res of
+ Left e -> liftIO $ throwIO e
+ Right (bs,_) -> return $ UTF8.toString bs
+
+readFile' :: MonadIO m => FilePath -> m B.ByteString
+readFile' "-" = liftIO $ B.getContents
+readFile' f = liftIO $ B.readFile f
+
+writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m ()
+writeFnBinary "-" = liftIO . B.putStr
+writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f)
+
+writerFn :: MonadIO m => FilePath -> String -> m ()
+writerFn "-" = liftIO . UTF8.putStr
+writerFn f = liftIO . UTF8.writeFile f
+
+lookupHighlightStyle :: Maybe String -> IO (Maybe Style)
+lookupHighlightStyle Nothing = return Nothing
+lookupHighlightStyle (Just s) =
+ case lookup (map toLower s) highlightingStyles of
+ Just sty -> return (Just sty)
+ Nothing -> err 68 $ "Unknown highlight-style " ++ s
+
+-- | A list of functions, each transforming the options data structure
+-- in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
+ [ Option "fr" ["from","read"]
+ (ReqArg
+ (\arg opt -> return opt { optReader = Just arg })
+ "FORMAT")
+ ""
+
+ , Option "tw" ["to","write"]
+ (ReqArg
+ (\arg opt -> return opt { optWriter = Just arg })
+ "FORMAT")
+ ""
+
+ , Option "o" ["output"]
+ (ReqArg
+ (\arg opt -> return opt { optOutputFile = arg })
+ "FILENAME")
+ "" -- "Name of output file"
+
+ , Option "" ["data-dir"]
+ (ReqArg
+ (\arg opt -> return opt { optDataDir = Just arg })
+ "DIRECTORY") -- "Directory containing pandoc data files."
+ ""
+
+ , Option "" ["base-header-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 && t < 6 -> do
+ return opt{ optBaseHeaderLevel = t }
+ _ -> err 19
+ "base-header-level must be 1-5")
+ "NUMBER")
+ "" -- "Headers base level"
+
+ , Option "" ["indented-code-classes"]
+ (ReqArg
+ (\arg opt -> return opt { optIndentedCodeClasses = words $
+ map (\c -> if c == ',' then ' ' else c) arg })
+ "STRING")
+ "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
+
+ , Option "F" ["filter"]
+ (ReqArg
+ (\arg opt -> return opt { optFilters = arg : optFilters opt })
+ "PROGRAM")
+ "" -- "External JSON filter"
+
+ , Option "p" ["preserve-tabs"]
+ (NoArg
+ (\opt -> return opt { optPreserveTabs = True }))
+ "" -- "Preserve tabs instead of converting to spaces"
+
+ , Option "" ["tab-stop"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optTabStop = t }
+ _ -> err 31
+ "tab-stop must be a number greater than 0")
+ "NUMBER")
+ "" -- "Tab stop (default 4)"
+
+ , Option "" ["track-changes"]
+ (ReqArg
+ (\arg opt -> do
+ action <- case arg of
+ "accept" -> return AcceptChanges
+ "reject" -> return RejectChanges
+ "all" -> return AllChanges
+ _ -> err 6
+ ("Unknown option for track-changes: " ++ arg)
+ return opt { optTrackChanges = action })
+ "accept|reject|all")
+ "" -- "Accepting or reject MS Word track-changes.""
+
+ , Option "" ["file-scope"]
+ (NoArg
+ (\opt -> return opt { optFileScope = True }))
+ "" -- "Parse input files before combining"
+
+ , Option "" ["extract-media"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optExtractMedia = Just arg })
+ "PATH")
+ "" -- "Directory to which to extract embedded media"
+
+ , Option "s" ["standalone"]
+ (NoArg
+ (\opt -> return opt { optStandalone = True }))
+ "" -- "Include needed header and footer on output"
+
+ , Option "" ["template"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optTemplate = Just arg,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "Use custom template"
+
+ , Option "M" ["metadata"]
+ (ReqArg
+ (\arg opt -> do
+ let (key, val) = splitField arg
+ return opt{ optMetadata = (key, val) : optMetadata opt })
+ "KEY[:VALUE]")
+ ""
+
+ , Option "V" ["variable"]
+ (ReqArg
+ (\arg opt -> do
+ let (key, val) = splitField arg
+ return opt{ optVariables = (key, val) : optVariables opt })
+ "KEY[:VALUE]")
+ ""
+
+ , Option "D" ["print-default-template"]
+ (ReqArg
+ (\arg _ -> do
+ templ <- getDefaultTemplate Nothing arg
+ case templ of
+ Right t -> UTF8.hPutStr stdout t
+ Left e -> error $ show e
+ exitSuccess)
+ "FORMAT")
+ "" -- "Print default template for FORMAT"
+
+ , Option "" ["print-default-data-file"]
+ (ReqArg
+ (\arg _ -> do
+ readDataFile Nothing arg >>= BS.hPutStr stdout
+ exitSuccess)
+ "FILE")
+ "" -- "Print default data file"
+
+ , Option "" ["dpi"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optDpi = t }
+ _ -> err 31
+ "dpi must be a number greater than 0")
+ "NUMBER")
+ "" -- "Dpi (default 96)"
+
+ , Option "" ["wrap"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of
+ Just o -> return opt { optWrapText = o }
+ Nothing -> err 77 "--wrap must be auto, none, or preserve")
+ "auto|none|preserve")
+ "" -- "Option for wrapping text in output"
+
+ , Option "" ["columns"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optColumns = t }
+ _ -> err 33
+ "columns must be a number greater than 0")
+ "NUMBER")
+ "" -- "Length of line in characters"
+
+ , Option "" ["toc", "table-of-contents"]
+ (NoArg
+ (\opt -> return opt { optTableOfContents = True }))
+ "" -- "Include table of contents"
+
+ , Option "" ["toc-depth"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optTOCDepth = t }
+ _ -> err 57
+ "TOC level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Number of levels to include in TOC"
+
+ , Option "" ["no-highlight"]
+ (NoArg
+ (\opt -> return opt { optHighlightStyle = Nothing }))
+ "" -- "Don't highlight source code"
+
+ , Option "" ["highlight-style"]
+ (ReqArg
+ (\arg opt -> return opt{ optHighlightStyle = Just arg })
+ "STYLE")
+ "" -- "Style for highlighted code"
+
+ , Option "H" ["include-in-header"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeInHeader =
+ arg : optIncludeInHeader opt,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to include at end of header (implies -s)"
+
+ , Option "B" ["include-before-body"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeBeforeBody =
+ arg : optIncludeBeforeBody opt,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to include before document body"
+
+ , Option "A" ["include-after-body"]
+ (ReqArg
+ (\arg opt -> return opt{ optIncludeAfterBody =
+ arg : optIncludeAfterBody opt,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to include after document body"
+
+ , Option "" ["self-contained"]
+ (NoArg
+ (\opt -> return opt { optSelfContained = True,
+ optStandalone = True }))
+ "" -- "Make slide shows include all the needed js and css"
+
+ , Option "" ["html-q-tags"]
+ (NoArg
+ (\opt ->
+ return opt { optHtmlQTags = True }))
+ "" -- "Use <q> tags for quotes in HTML"
+
+ , Option "" ["ascii"]
+ (NoArg
+ (\opt -> return opt { optAscii = True }))
+ "" -- "Use ascii characters only in HTML output"
+
+ , Option "" ["reference-links"]
+ (NoArg
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
+
+ , Option "" ["reference-location"]
+ (ReqArg
+ (\arg opt -> do
+ action <- case arg of
+ "block" -> return EndOfBlock
+ "section" -> return EndOfSection
+ "document" -> return EndOfDocument
+ _ -> err 6
+ ("Unknown option for reference-location: " ++ arg)
+ return opt { optReferenceLocation = action })
+ "block|section|document")
+ "" -- "Accepting or reject MS Word track-changes.""
+
+ , Option "" ["atx-headers"]
+ (NoArg
+ (\opt -> return opt { optSetextHeaders = False } ))
+ "" -- "Use atx-style headers for markdown"
+
+ , Option "" ["top-level-division"]
+ (ReqArg
+ (\arg opt -> do
+ let tldName = "TopLevel" ++ uppercaseFirstLetter arg
+ case safeRead tldName of
+ Just tlDiv -> return opt { optTopLevelDivision = tlDiv }
+ _ -> err 76 ("Top-level division must be " ++
+ "section, chapter, part, or default"))
+ "section|chapter|part")
+ "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook"
+
+ , Option "N" ["number-sections"]
+ (NoArg
+ (\opt -> return opt { optNumberSections = True }))
+ "" -- "Number sections in LaTeX"
+
+ , Option "" ["number-offset"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead ('[':arg ++ "]") of
+ Just ns -> return opt { optNumberOffset = ns,
+ optNumberSections = True }
+ _ -> err 57 "could not parse number-offset")
+ "NUMBERS")
+ "" -- "Starting number for sections, subsections, etc."
+
+ , Option "" ["listings"]
+ (NoArg
+ (\opt -> return opt { optListings = True }))
+ "" -- "Use listings package for LaTeX code blocks"
+
+ , Option "i" ["incremental"]
+ (NoArg
+ (\opt -> return opt { optIncremental = True }))
+ "" -- "Make list items display incrementally in Slidy/Slideous/S5"
+
+ , Option "" ["slide-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optSlideLevel = Just t }
+ _ -> err 39
+ "slide level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Force header level for slides"
+
+ , Option "" ["section-divs"]
+ (NoArg
+ (\opt -> return opt { optSectionDivs = True }))
+ "" -- "Put sections in div tags in HTML"
+
+ , Option "" ["default-image-extension"]
+ (ReqArg
+ (\arg opt -> return opt { optDefaultImageExtension = arg })
+ "extension")
+ "" -- "Default extension for extensionless images"
+
+ , Option "" ["email-obfuscation"]
+ (ReqArg
+ (\arg opt -> do
+ method <- case arg of
+ "references" -> return ReferenceObfuscation
+ "javascript" -> return JavascriptObfuscation
+ "none" -> return NoObfuscation
+ _ -> err 6
+ ("Unknown obfuscation method: " ++ arg)
+ return opt { optEmailObfuscation = method })
+ "none|javascript|references")
+ "" -- "Method for obfuscating email in HTML"
+
+ , Option "" ["id-prefix"]
+ (ReqArg
+ (\arg opt -> return opt { optIdentifierPrefix = arg })
+ "STRING")
+ "" -- "Prefix to add to automatically generated HTML identifiers"
+
+ , Option "T" ["title-prefix"]
+ (ReqArg
+ (\arg opt -> do
+ let newvars = ("title-prefix", arg) : optVariables opt
+ return opt { optVariables = newvars,
+ optStandalone = True })
+ "STRING")
+ "" -- "String to prefix to HTML window title"
+
+ , Option "c" ["css"]
+ (ReqArg
+ (\arg opt -> return opt{ optCss = arg : optCss opt })
+ -- add new link to end, so it is included in proper order
+ "URL")
+ "" -- "Link to CSS style sheet"
+
+ , Option "" ["reference-doc"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optReferenceDoc = Just arg })
+ "FILENAME")
+ "" -- "Path of custom reference doc"
+
+ , Option "" ["epub-stylesheet"]
+ (ReqArg
+ (\arg opt -> return opt { optEpubStylesheet = Just arg })
+ "FILENAME")
+ "" -- "Path of epub.css"
+
+ , Option "" ["epub-cover-image"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optVariables =
+ ("epub-cover-image", arg) : optVariables opt })
+ "FILENAME")
+ "" -- "Path of epub cover image"
+
+ , Option "" ["epub-metadata"]
+ (ReqArg
+ (\arg opt -> return opt { optEpubMetadata = Just arg })
+ "FILENAME")
+ "" -- "Path of epub metadata file"
+
+ , Option "" ["epub-embed-font"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optEpubFonts = arg : optEpubFonts opt })
+ "FILE")
+ "" -- "Directory of fonts to embed"
+
+ , Option "" ["epub-chapter-level"]
+ (ReqArg
+ (\arg opt ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
+ return opt { optEpubChapterLevel = t }
+ _ -> err 59
+ "chapter level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Header level at which to split chapters in EPUB"
+
+ , Option "" ["latex-engine"]
+ (ReqArg
+ (\arg opt -> do
+ let b = takeBaseName arg
+ if b `elem` ["pdflatex", "lualatex", "xelatex"]
+ then return opt { optLaTeXEngine = arg }
+ else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.")
+ "PROGRAM")
+ "" -- "Name of latex program to use in generating PDF"
+
+ , Option "" ["latex-engine-opt"]
+ (ReqArg
+ (\arg opt -> do
+ let oldArgs = optLaTeXEngineArgs opt
+ return opt { optLaTeXEngineArgs = arg : oldArgs })
+ "STRING")
+ "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used"
+
+ , Option "" ["bibliography"]
+ (ReqArg
+ (\arg opt -> return opt{ optMetadata =
+ ("bibliography", arg) : optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["csl"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optMetadata =
+ ("csl", arg) : optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["citation-abbreviations"]
+ (ReqArg
+ (\arg opt ->
+ return opt{ optMetadata =
+ ("citation-abbreviations", arg): optMetadata opt })
+ "FILE")
+ ""
+
+ , Option "" ["natbib"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Natbib }))
+ "" -- "Use natbib cite commands in LaTeX output"
+
+ , Option "" ["biblatex"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Biblatex }))
+ "" -- "Use biblatex cite commands in LaTeX output"
+
+ , Option "m" ["latexmathml", "asciimathml"]
+ (OptArg
+ (\arg opt ->
+ return opt { optHTMLMathMethod = LaTeXMathML arg })
+ "URL")
+ "" -- "Use LaTeXMathML script in html output"
+
+ , Option "" ["mathml"]
+ (NoArg
+ (\opt ->
+ return opt { optHTMLMathMethod = MathML }))
+ "" -- "Use mathml for HTML math"
+
+ , Option "" ["mimetex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u ++ "?"
+ Nothing -> "/cgi-bin/mimetex.cgi?"
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use mimetex for HTML math"
+
+ , Option "" ["webtex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use web service for HTML math"
+
+ , Option "" ["jsmath"]
+ (OptArg
+ (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
+ "URL")
+ "" -- "Use jsMath for HTML math"
+
+ , Option "" ["mathjax"]
+ (OptArg
+ (\arg opt -> do
+ let url' = fromMaybe "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML-full" arg
+ return opt { optHTMLMathMethod = MathJax url'})
+ "URL")
+ "" -- "Use MathJax for HTML math"
+ , Option "" ["katex"]
+ (OptArg
+ (\arg opt ->
+ return opt
+ { optKaTeXJS =
+ arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"})
+ "URL")
+ "" -- Use KaTeX for HTML Math
+
+ , Option "" ["katex-stylesheet"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optKaTeXStylesheet = Just arg })
+ "URL")
+ "" -- Set the KaTeX Stylesheet location
+
+ , Option "" ["gladtex"]
+ (NoArg
+ (\opt -> return opt { optHTMLMathMethod = GladTeX }))
+ "" -- "Use gladtex for HTML math"
+
+ , Option "" ["trace"]
+ (NoArg
+ (\opt -> return opt { optVerbosity = DEBUG }))
+ "" -- "Turn on diagnostic tracing in readers."
+
+ , Option "" ["dump-args"]
+ (NoArg
+ (\opt -> return opt { optDumpArgs = True }))
+ "" -- "Print output filename and arguments to stdout."
+
+ , Option "" ["ignore-args"]
+ (NoArg
+ (\opt -> return opt { optIgnoreArgs = True }))
+ "" -- "Ignore command-line arguments."
+
+ , Option "" ["verbose"]
+ (NoArg
+ (\opt -> return opt { optVerbosity = INFO }))
+ "" -- "Verbose diagnostic output."
+
+ , Option "" ["quiet"]
+ (NoArg
+ (\opt -> return opt { optVerbosity = ERROR }))
+ "" -- "Suppress warnings."
+
+ , Option "" ["fail-if-warnings"]
+ (NoArg
+ (\opt -> return opt { optFailIfWarnings = True }))
+ "" -- "Exit with error status if there were warnings."
+
+ , Option "" ["log"]
+ (ReqArg
+ (\arg opt -> return opt{ optLogFile = Just arg })
+ "FILE")
+ "" -- "Log messages in JSON format to this file."
+
+ , Option "" ["bash-completion"]
+ (NoArg
+ (\_ -> do
+ ddir <- getDataDir
+ tpl <- readDataFileUTF8 Nothing "bash_completion.tpl"
+ let optnames (Option shorts longs _ _) =
+ map (\c -> ['-',c]) shorts ++
+ map ("--" ++) longs
+ let allopts = unwords (concatMap optnames options)
+ UTF8.hPutStrLn stdout $ printf tpl allopts
+ (unwords readers'names)
+ (unwords writers'names)
+ (unwords $ map fst highlightingStyles)
+ ddir
+ exitSuccess ))
+ "" -- "Print bash completion script"
+
+ , Option "" ["list-input-formats"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout) readers'names
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-output-formats"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout) writers'names
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-extensions"]
+ (NoArg
+ (\_ -> do
+ let showExt x = drop 4 (show x) ++
+ if extensionEnabled x pandocExtensions
+ then " +"
+ else " -"
+ mapM_ (UTF8.hPutStrLn stdout . showExt)
+ ([minBound..maxBound] :: [Extension])
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-highlight-languages"]
+ (NoArg
+ (\_ -> do
+ let langs = [ T.unpack (T.toLower (sShortname s))
+ | s <- M.elems defaultSyntaxMap
+ , sShortname s `notElem`
+ [T.pack "Alert", T.pack "Alert_indent"]
+ ]
+ mapM_ (UTF8.hPutStrLn stdout) langs
+ exitSuccess ))
+ ""
+
+ , Option "" ["list-highlight-styles"]
+ (NoArg
+ (\_ -> do
+ mapM_ (UTF8.hPutStrLn stdout) $
+ map fst highlightingStyles
+ exitSuccess ))
+ ""
+
+ , Option "v" ["version"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ defaultDatadir <- E.catch
+ (getAppUserDataDirectory "pandoc")
+ (\e -> let _ = (e :: E.SomeException)
+ in return "")
+ UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++
+ compileInfo ++ "\nDefault user data directory: " ++
+ defaultDatadir ++ copyrightMessage)
+ exitSuccess ))
+ "" -- "Print version"
+
+ , Option "h" ["help"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ UTF8.hPutStr stdout (usageMessage prg options)
+ exitSuccess ))
+ "" -- "Show help"
+
+ ]
+
+-- Returns usage message
+usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
+usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
+
+copyrightMessage :: String
+copyrightMessage = intercalate "\n" [
+ "",
+ "Copyright (C) 2006-2017 John MacFarlane",
+ "Web: http://pandoc.org",
+ "This is free software; see the source for copying conditions.",
+ "There is no warranty, not even for merchantability or fitness",
+ "for a particular purpose." ]
+
+compileInfo :: String
+compileInfo =
+ "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++
+ VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting
+
+handleUnrecognizedOption :: String -> [String] -> [String]
+handleUnrecognizedOption "--smart" =
+ (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++
+ "For example: pandoc -f markdown+smart -t markdown-smart.") :)
+handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
+handleUnrecognizedOption "--old-dashes" =
+ ("--old-dashes has been removed. Use +old_dashes extension instead." :)
+handleUnrecognizedOption "--no-wrap" =
+ ("--no-wrap has been removed. Use --wrap=none instead." :)
+handleUnrecognizedOption "--chapters" =
+ ("--chapters has been removed. Use --top-level-division=chapter instead." :)
+handleUnrecognizedOption "--reference-docx" =
+ ("--reference-docx has been removed. Use --reference-doc instead." :)
+handleUnrecognizedOption "--reference-odt" =
+ ("--reference-odt has been removed. Use --reference-doc instead." :)
+handleUnrecognizedOption "--parse-raw" =
+ (("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n") :)
+handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
+handleUnrecognizedOption x =
+ (("Unknown option " ++ x ++ ".") :)
+
+uppercaseFirstLetter :: String -> String
+uppercaseFirstLetter (c:cs) = toUpper c : cs
+uppercaseFirstLetter [] = []
+
+readers'names :: [String]
+readers'names = sort (map fst (readers :: [(String, Reader PandocIO)]))
+
+writers'names :: [String]
+writers'names = sort (map fst (writers :: [(String, Writer PandocIO)]))
+
+splitField :: String -> (String, String)
+splitField s =
+ case break (`elem` ":=") s of
+ (k,_:v) -> (k,v)
+ (k,[]) -> (k,"true")
+
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
new file mode 100644
index 000000000..8eb1ba663
--- /dev/null
+++ b/src/Text/Pandoc/Asciify.hs
@@ -0,0 +1,422 @@
+{-
+Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Asciify
+ Copyright : Copyright (C) 2013-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Function to convert accented latin letters to their unaccented
+ascii equivalents (used in constructing HTML identifiers).
+-}
+module Text.Pandoc.Asciify (toAsciiChar)
+where
+import qualified Data.Map as M
+import Data.Char (isAscii)
+
+toAsciiChar :: Char -> Maybe Char
+toAsciiChar c | isAscii c = Just c
+ | otherwise = M.lookup c asciiMap
+
+asciiMap :: M.Map Char Char
+asciiMap = M.fromList
+ [('\192','A')
+ ,('\193','A')
+ ,('\194','A')
+ ,('\195','A')
+ ,('\196','A')
+ ,('\197','A')
+ ,('\199','C')
+ ,('\200','E')
+ ,('\201','E')
+ ,('\202','E')
+ ,('\203','E')
+ ,('\204','I')
+ ,('\205','I')
+ ,('\206','I')
+ ,('\207','I')
+ ,('\209','N')
+ ,('\210','O')
+ ,('\211','O')
+ ,('\212','O')
+ ,('\213','O')
+ ,('\214','O')
+ ,('\217','U')
+ ,('\218','U')
+ ,('\219','U')
+ ,('\220','U')
+ ,('\221','Y')
+ ,('\224','a')
+ ,('\225','a')
+ ,('\226','a')
+ ,('\227','a')
+ ,('\228','a')
+ ,('\229','a')
+ ,('\231','c')
+ ,('\232','e')
+ ,('\233','e')
+ ,('\234','e')
+ ,('\235','e')
+ ,('\236','i')
+ ,('\237','i')
+ ,('\238','i')
+ ,('\239','i')
+ ,('\241','n')
+ ,('\242','o')
+ ,('\243','o')
+ ,('\244','o')
+ ,('\245','o')
+ ,('\246','o')
+ ,('\249','u')
+ ,('\250','u')
+ ,('\251','u')
+ ,('\252','u')
+ ,('\253','y')
+ ,('\255','y')
+ ,('\256','A')
+ ,('\257','a')
+ ,('\258','A')
+ ,('\259','a')
+ ,('\260','A')
+ ,('\261','a')
+ ,('\262','C')
+ ,('\263','c')
+ ,('\264','C')
+ ,('\265','c')
+ ,('\266','C')
+ ,('\267','c')
+ ,('\268','C')
+ ,('\269','c')
+ ,('\270','D')
+ ,('\271','d')
+ ,('\274','E')
+ ,('\275','e')
+ ,('\276','E')
+ ,('\277','e')
+ ,('\278','E')
+ ,('\279','e')
+ ,('\280','E')
+ ,('\281','e')
+ ,('\282','E')
+ ,('\283','e')
+ ,('\284','G')
+ ,('\285','g')
+ ,('\286','G')
+ ,('\287','g')
+ ,('\288','G')
+ ,('\289','g')
+ ,('\290','G')
+ ,('\291','g')
+ ,('\292','H')
+ ,('\293','h')
+ ,('\296','I')
+ ,('\297','i')
+ ,('\298','I')
+ ,('\299','i')
+ ,('\300','I')
+ ,('\301','i')
+ ,('\302','I')
+ ,('\303','i')
+ ,('\304','I')
+ ,('\308','J')
+ ,('\309','j')
+ ,('\310','K')
+ ,('\311','k')
+ ,('\313','L')
+ ,('\314','l')
+ ,('\315','L')
+ ,('\316','l')
+ ,('\317','L')
+ ,('\318','l')
+ ,('\323','N')
+ ,('\324','n')
+ ,('\325','N')
+ ,('\326','n')
+ ,('\327','N')
+ ,('\328','n')
+ ,('\332','O')
+ ,('\333','o')
+ ,('\334','O')
+ ,('\335','o')
+ ,('\336','O')
+ ,('\337','o')
+ ,('\340','R')
+ ,('\341','r')
+ ,('\342','R')
+ ,('\343','r')
+ ,('\344','R')
+ ,('\345','r')
+ ,('\346','S')
+ ,('\347','s')
+ ,('\348','S')
+ ,('\349','s')
+ ,('\350','S')
+ ,('\351','s')
+ ,('\352','S')
+ ,('\353','s')
+ ,('\354','T')
+ ,('\355','t')
+ ,('\356','T')
+ ,('\357','t')
+ ,('\360','U')
+ ,('\361','u')
+ ,('\362','U')
+ ,('\363','u')
+ ,('\364','U')
+ ,('\365','u')
+ ,('\366','U')
+ ,('\367','u')
+ ,('\368','U')
+ ,('\369','u')
+ ,('\370','U')
+ ,('\371','u')
+ ,('\372','W')
+ ,('\373','w')
+ ,('\374','Y')
+ ,('\375','y')
+ ,('\376','Y')
+ ,('\377','Z')
+ ,('\378','z')
+ ,('\379','Z')
+ ,('\380','z')
+ ,('\381','Z')
+ ,('\382','z')
+ ,('\416','O')
+ ,('\417','o')
+ ,('\431','U')
+ ,('\432','u')
+ ,('\461','A')
+ ,('\462','a')
+ ,('\463','I')
+ ,('\464','i')
+ ,('\465','O')
+ ,('\466','o')
+ ,('\467','U')
+ ,('\468','u')
+ ,('\486','G')
+ ,('\487','g')
+ ,('\488','K')
+ ,('\489','k')
+ ,('\490','O')
+ ,('\491','o')
+ ,('\496','j')
+ ,('\500','G')
+ ,('\501','g')
+ ,('\504','N')
+ ,('\505','n')
+ ,('\512','A')
+ ,('\513','a')
+ ,('\514','A')
+ ,('\515','a')
+ ,('\516','E')
+ ,('\517','e')
+ ,('\518','E')
+ ,('\519','e')
+ ,('\520','I')
+ ,('\521','i')
+ ,('\522','I')
+ ,('\523','i')
+ ,('\524','O')
+ ,('\525','o')
+ ,('\526','O')
+ ,('\527','o')
+ ,('\528','R')
+ ,('\529','r')
+ ,('\530','R')
+ ,('\531','r')
+ ,('\532','U')
+ ,('\533','u')
+ ,('\534','U')
+ ,('\535','u')
+ ,('\536','S')
+ ,('\537','s')
+ ,('\538','T')
+ ,('\539','t')
+ ,('\542','H')
+ ,('\543','h')
+ ,('\550','A')
+ ,('\551','a')
+ ,('\552','E')
+ ,('\553','e')
+ ,('\558','O')
+ ,('\559','o')
+ ,('\562','Y')
+ ,('\563','y')
+ ,('\894',';')
+ ,('\7680','A')
+ ,('\7681','a')
+ ,('\7682','B')
+ ,('\7683','b')
+ ,('\7684','B')
+ ,('\7685','b')
+ ,('\7686','B')
+ ,('\7687','b')
+ ,('\7690','D')
+ ,('\7691','d')
+ ,('\7692','D')
+ ,('\7693','d')
+ ,('\7694','D')
+ ,('\7695','d')
+ ,('\7696','D')
+ ,('\7697','d')
+ ,('\7698','D')
+ ,('\7699','d')
+ ,('\7704','E')
+ ,('\7705','e')
+ ,('\7706','E')
+ ,('\7707','e')
+ ,('\7710','F')
+ ,('\7711','f')
+ ,('\7712','G')
+ ,('\7713','g')
+ ,('\7714','H')
+ ,('\7715','h')
+ ,('\7716','H')
+ ,('\7717','h')
+ ,('\7718','H')
+ ,('\7719','h')
+ ,('\7720','H')
+ ,('\7721','h')
+ ,('\7722','H')
+ ,('\7723','h')
+ ,('\7724','I')
+ ,('\7725','i')
+ ,('\7728','K')
+ ,('\7729','k')
+ ,('\7730','K')
+ ,('\7731','k')
+ ,('\7732','K')
+ ,('\7733','k')
+ ,('\7734','L')
+ ,('\7735','l')
+ ,('\7738','L')
+ ,('\7739','l')
+ ,('\7740','L')
+ ,('\7741','l')
+ ,('\7742','M')
+ ,('\7743','m')
+ ,('\7744','M')
+ ,('\7745','m')
+ ,('\7746','M')
+ ,('\7747','m')
+ ,('\7748','N')
+ ,('\7749','n')
+ ,('\7750','N')
+ ,('\7751','n')
+ ,('\7752','N')
+ ,('\7753','n')
+ ,('\7754','N')
+ ,('\7755','n')
+ ,('\7764','P')
+ ,('\7765','p')
+ ,('\7766','P')
+ ,('\7767','p')
+ ,('\7768','R')
+ ,('\7769','r')
+ ,('\7770','R')
+ ,('\7771','r')
+ ,('\7774','R')
+ ,('\7775','r')
+ ,('\7776','S')
+ ,('\7777','s')
+ ,('\7778','S')
+ ,('\7779','s')
+ ,('\7786','T')
+ ,('\7787','t')
+ ,('\7788','T')
+ ,('\7789','t')
+ ,('\7790','T')
+ ,('\7791','t')
+ ,('\7792','T')
+ ,('\7793','t')
+ ,('\7794','U')
+ ,('\7795','u')
+ ,('\7796','U')
+ ,('\7797','u')
+ ,('\7798','U')
+ ,('\7799','u')
+ ,('\7804','V')
+ ,('\7805','v')
+ ,('\7806','V')
+ ,('\7807','v')
+ ,('\7808','W')
+ ,('\7809','w')
+ ,('\7810','W')
+ ,('\7811','w')
+ ,('\7812','W')
+ ,('\7813','w')
+ ,('\7814','W')
+ ,('\7815','w')
+ ,('\7816','W')
+ ,('\7817','w')
+ ,('\7818','X')
+ ,('\7819','x')
+ ,('\7820','X')
+ ,('\7821','x')
+ ,('\7822','Y')
+ ,('\7823','y')
+ ,('\7824','Z')
+ ,('\7825','z')
+ ,('\7826','Z')
+ ,('\7827','z')
+ ,('\7828','Z')
+ ,('\7829','z')
+ ,('\7830','h')
+ ,('\7831','t')
+ ,('\7832','w')
+ ,('\7833','y')
+ ,('\7840','A')
+ ,('\7841','a')
+ ,('\7842','A')
+ ,('\7843','a')
+ ,('\7864','E')
+ ,('\7865','e')
+ ,('\7866','E')
+ ,('\7867','e')
+ ,('\7868','E')
+ ,('\7869','e')
+ ,('\7880','I')
+ ,('\7881','i')
+ ,('\7882','I')
+ ,('\7883','i')
+ ,('\7884','O')
+ ,('\7885','o')
+ ,('\7886','O')
+ ,('\7887','o')
+ ,('\7908','U')
+ ,('\7909','u')
+ ,('\7910','U')
+ ,('\7911','u')
+ ,('\7922','Y')
+ ,('\7923','y')
+ ,('\7924','Y')
+ ,('\7925','y')
+ ,('\7926','Y')
+ ,('\7927','y')
+ ,('\7928','Y')
+ ,('\7929','y')
+ ,('\8175','`')
+ ,('\8490','K')
+ ,('\8800','=')
+ ,('\8814','<')
+ ,('\8815','>')
+ ]
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
new file mode 100644
index 000000000..f479ed9d0
--- /dev/null
+++ b/src/Text/Pandoc/CSS.hs
@@ -0,0 +1,43 @@
+module Text.Pandoc.CSS ( foldOrElse
+ , pickStyleAttrProps
+ , pickStylesToKVs
+ )
+where
+
+import Text.Pandoc.Shared (trim)
+import Text.Parsec
+import Text.Parsec.String
+
+ruleParser :: Parser (String, String)
+ruleParser = do
+ p <- many1 (noneOf ":") <* char ':'
+ v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces
+ return (trim p, trim v)
+
+styleAttrParser :: Parser [(String, String)]
+styleAttrParser = many1 ruleParser
+
+orElse :: Eq a => a -> a -> a -> a
+orElse v x y = if v == x then y else x
+
+foldOrElse :: Eq a => a -> [a] -> a
+foldOrElse v xs = foldr (orElse v) v xs
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Right x) = Just x
+eitherToMaybe _ = Nothing
+
+-- | takes a list of keys/properties and a CSS string and
+-- returns the corresponding key-value-pairs.
+pickStylesToKVs :: [String] -> String -> [(String, String)]
+pickStylesToKVs props styleAttr =
+ case parse styleAttrParser "" styleAttr of
+ Left _ -> []
+ Right styles -> filter (\s -> fst s `elem` props) styles
+
+-- | takes a list of key/property synonyms and a CSS string and maybe
+-- returns the value of the first match (in order of the supplied list)
+pickStyleAttrProps :: [String] -> String -> Maybe String
+pickStyleAttrProps lookupProps styleAttr = do
+ styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
+ foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
new file mode 100644
index 000000000..fb148666c
--- /dev/null
+++ b/src/Text/Pandoc/Class.hs
@@ -0,0 +1,539 @@
+{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances,
+FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts #-}
+
+{-
+Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Class
+ Copyright : Copyright (C) 2016 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Typeclass for pandoc readers and writers, allowing both IO and pure instances.
+-}
+
+module Text.Pandoc.Class ( PandocMonad(..)
+ , CommonState(..)
+ , PureState(..)
+ , getPureState
+ , getsPureState
+ , putPureState
+ , modifyPureState
+ , getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , getLog
+ , setVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , fetchItem
+ , getInputFiles
+ , getOutputFile
+ , PandocIO(..)
+ , PandocPure(..)
+ , FileTree(..)
+ , FileInfo(..)
+ , runIO
+ , runIOorExplode
+ , runPure
+ , withMediaBag
+ ) where
+
+import Prelude hiding (readFile)
+import System.Random (StdGen, next, mkStdGen)
+import qualified System.Random as IO (newStdGen)
+import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
+import Data.Unique (hashUnique)
+import qualified Data.Unique as IO (newUnique)
+import qualified Text.Pandoc.Shared as IO ( readDataFile
+ , openURL )
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Compat.Time (UTCTime)
+import Text.Pandoc.Logging
+import Text.Parsec (ParsecT)
+import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
+import Text.Pandoc.MIME (MimeType, getMimeType)
+import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
+ , posixSecondsToUTCTime
+ , POSIXTime )
+import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
+import Network.URI ( escapeURIString, nonStrictRelativeTo,
+ unEscapeString, parseURIReference, isAllowedInURI,
+ parseURI, URI(..) )
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import qualified Text.Pandoc.MediaBag as MB
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified System.Environment as IO (lookupEnv)
+import System.FilePath.Glob (match, compile)
+import System.FilePath ((</>), takeExtension, dropExtension)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.Directory as IO (getModificationTime)
+import Control.Monad as M (fail)
+import Control.Monad.Reader (ReaderT)
+import Control.Monad.State
+import Control.Monad.Except
+import Control.Monad.Writer (WriterT)
+import Control.Monad.RWS (RWST)
+import Data.Word (Word8)
+import Data.Default
+import System.IO.Error
+import System.IO (stderr)
+import qualified Data.Map as M
+import Text.Pandoc.Error
+import Text.Printf (printf)
+
+class (Functor m, Applicative m, Monad m, MonadError PandocError m)
+ => PandocMonad m where
+ lookupEnv :: String -> m (Maybe String)
+ getCurrentTime :: m UTCTime
+ getCurrentTimeZone :: m TimeZone
+ newStdGen :: m StdGen
+ newUniqueHash :: m Int
+ openURL :: String -> m (B.ByteString, Maybe MimeType)
+ readFileLazy :: FilePath -> m BL.ByteString
+ readFileStrict :: FilePath -> m B.ByteString
+ readDataFile :: Maybe FilePath
+ -> FilePath
+ -> m B.ByteString
+ glob :: String -> m [FilePath]
+ getModificationTime :: FilePath -> m UTCTime
+ getCommonState :: m CommonState
+ putCommonState :: CommonState -> m ()
+
+ getsCommonState :: (CommonState -> a) -> m a
+ getsCommonState f = f <$> getCommonState
+
+ modifyCommonState :: (CommonState -> CommonState) -> m ()
+ modifyCommonState f = getCommonState >>= putCommonState . f
+
+ logOutput :: LogMessage -> m ()
+
+-- Functions defined for all PandocMonad instances
+
+setVerbosity :: PandocMonad m => Verbosity -> m ()
+setVerbosity verbosity =
+ modifyCommonState $ \st -> st{ stVerbosity = verbosity }
+
+getLog :: PandocMonad m => m [LogMessage]
+getLog = reverse <$> getsCommonState stLog
+
+report :: PandocMonad m => LogMessage -> m ()
+report msg = do
+ verbosity <- getsCommonState stVerbosity
+ let level = messageVerbosity msg
+ when (level <= verbosity) $ do
+ logOutput msg
+ unless (level == DEBUG) $
+ modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+setMediaBag :: PandocMonad m => MediaBag -> m ()
+setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
+
+getMediaBag :: PandocMonad m => m MediaBag
+getMediaBag = getsCommonState stMediaBag
+
+insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
+insertMedia fp mime bs = do
+ mb <- getsCommonState stMediaBag
+ let mb' = MB.insertMedia fp mime bs mb
+ modifyCommonState $ \st -> st{stMediaBag = mb' }
+
+getInputFiles :: PandocMonad m => m (Maybe [FilePath])
+getInputFiles = getsCommonState stInputFiles
+
+getOutputFile :: PandocMonad m => m (Maybe FilePath)
+getOutputFile = getsCommonState stOutputFile
+
+getPOSIXTime :: (PandocMonad m) => m POSIXTime
+getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
+
+getZonedTime :: (PandocMonad m) => m ZonedTime
+getZonedTime = do
+ t <- getCurrentTime
+ tz <- getCurrentTimeZone
+ return $ utcToZonedTime tz t
+
+-- | Read file, checking in any number of directories.
+readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String)
+readFileFromDirs [] _ = return Nothing
+readFileFromDirs (d:ds) f = catchError
+ ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f))
+ (\_ -> readFileFromDirs ds f)
+
+--
+
+data CommonState = CommonState { stLog :: [LogMessage]
+ , stMediaBag :: MediaBag
+ , stInputFiles :: Maybe [FilePath]
+ , stOutputFile :: Maybe FilePath
+ , stVerbosity :: Verbosity
+ }
+
+instance Default CommonState where
+ def = CommonState { stLog = []
+ , stMediaBag = mempty
+ , stInputFiles = Nothing
+ , stOutputFile = Nothing
+ , stVerbosity = WARNING
+ }
+
+runIO :: PandocIO a -> IO (Either PandocError a)
+runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
+
+withMediaBag :: PandocMonad m => m a -> m (a, MediaBag)
+withMediaBag ma = ((,)) <$> ma <*> getMediaBag
+
+runIOorExplode :: PandocIO a -> IO a
+runIOorExplode ma = runIO ma >>= handleError
+
+newtype PandocIO a = PandocIO {
+ unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
+ } deriving ( MonadIO
+ , Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+instance PandocMonad PandocIO where
+ lookupEnv = liftIO . IO.lookupEnv
+ getCurrentTime = liftIO IO.getCurrentTime
+ getCurrentTimeZone = liftIO IO.getCurrentTimeZone
+ newStdGen = liftIO IO.newStdGen
+ newUniqueHash = hashUnique <$> (liftIO IO.newUnique)
+ openURL u = do
+ eitherRes <- liftIO $ (tryIOError $ IO.openURL u)
+ case eitherRes of
+ Right (Right res) -> return res
+ Right (Left _) -> throwError $ PandocFileReadError u
+ Left _ -> throwError $ PandocFileReadError u
+ readFileLazy s = do
+ eitherBS <- liftIO (tryIOError $ BL.readFile s)
+ case eitherBS of
+ Right bs -> return bs
+ Left _ -> throwError $ PandocFileReadError s
+ readFileStrict s = do
+ eitherBS <- liftIO (tryIOError $ B.readFile s)
+ case eitherBS of
+ Right bs -> return bs
+ Left _ -> throwError $ PandocFileReadError s
+ -- TODO: Make this more sensitive to the different sorts of failure
+ readDataFile mfp fname = do
+ eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname)
+ case eitherBS of
+ Right bs -> return bs
+ Left _ -> throwError $ PandocFileReadError fname
+ glob = liftIO . IO.glob
+ getModificationTime fp = do
+ eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp)
+ case eitherMtime of
+ Right mtime -> return mtime
+ Left _ -> throwError $ PandocFileReadError fp
+ getCommonState = PandocIO $ lift get
+ putCommonState x = PandocIO $ lift $ put x
+ logOutput msg =
+ liftIO $ UTF8.hPutStrLn stderr $ printf "%-7s %s"
+ (show (messageVerbosity msg)) (showLogMessage msg)
+
+-- | Specialized version of parseURIReference that disallows
+-- single-letter schemes. Reason: these are usually windows absolute
+-- paths.
+parseURIReference' :: String -> Maybe URI
+parseURIReference' s =
+ case parseURIReference s of
+ Just u
+ | length (uriScheme u) > 2 -> Just u
+ | null (uriScheme u) -> Just u -- protocol-relative
+ _ -> Nothing
+
+-- | Fetch an image or other item from the local filesystem or the net.
+-- Returns raw content and maybe mime type.
+fetchItem :: PandocMonad m
+ => Maybe String
+ -> String
+ -> m (B.ByteString, Maybe MimeType)
+fetchItem sourceURL s = do
+ mediabag <- getMediaBag
+ case lookupMedia s mediabag of
+ Just (mime, bs) -> return $ (BL.toStrict bs, Just mime)
+ Nothing -> downloadOrRead sourceURL s
+
+downloadOrRead :: PandocMonad m
+ => Maybe String
+ -> String
+ -> m (B.ByteString, Maybe MimeType)
+downloadOrRead sourceURL s = do
+ case (sourceURL >>= parseURIReference' .
+ ensureEscaped, ensureEscaped s) of
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference' s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI
+ case parseURIReference' s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
+ Nothing -> openURL s' -- will throw error
+ (Nothing, s') ->
+ case parseURI s' of -- requires absolute URI
+ -- We don't want to treat C:/ as a scheme:
+ Just u' | length (uriScheme u') > 2 -> openURL (show u')
+ Just u' | uriScheme u' == "file:" ->
+ readLocalFile $ dropWhile (=='/') (uriPath u')
+ _ -> readLocalFile fp -- get from local file system
+ where readLocalFile f = do
+ cont <- readFileStrict f
+ return (cont, mime)
+ httpcolon = URI{ uriScheme = "http:",
+ uriAuthority = Nothing,
+ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ dropFragmentAndQuery s
+ mime = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
+ x -> getMimeType x
+ ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
+ convertSlash '\\' = '/'
+ convertSlash x = x
+
+data PureState = PureState { stStdGen :: StdGen
+ , stWord8Store :: [Word8] -- should be
+ -- inifinite,
+ -- i.e. [1..]
+ , stUniqStore :: [Int] -- should be
+ -- inifinite and
+ -- contain every
+ -- element at most
+ -- once, e.g. [1..]
+ , stEnv :: [(String, String)]
+ , stTime :: UTCTime
+ , stTimeZone :: TimeZone
+ , stReferenceDocx :: Archive
+ , stReferenceODT :: Archive
+ , stFiles :: FileTree
+ , stUserDataDir :: FileTree
+ , stCabalDataDir :: FileTree
+ , stFontFiles :: [FilePath]
+ }
+
+instance Default PureState where
+ def = PureState { stStdGen = mkStdGen 1848
+ , stWord8Store = [1..]
+ , stUniqStore = [1..]
+ , stEnv = [("USER", "pandoc-user")]
+ , stTime = posixSecondsToUTCTime 0
+ , stTimeZone = utc
+ , stReferenceDocx = emptyArchive
+ , stReferenceODT = emptyArchive
+ , stFiles = mempty
+ , stUserDataDir = mempty
+ , stCabalDataDir = mempty
+ , stFontFiles = []
+ }
+
+
+getPureState :: PandocPure PureState
+getPureState = PandocPure $ lift $ lift $ get
+
+getsPureState :: (PureState -> a) -> PandocPure a
+getsPureState f = f <$> getPureState
+
+putPureState :: PureState -> PandocPure ()
+putPureState ps= PandocPure $ lift $ lift $ put ps
+
+modifyPureState :: (PureState -> PureState) -> PandocPure ()
+modifyPureState f = PandocPure $ lift $ lift $ modify f
+
+
+data FileInfo = FileInfo { infoFileMTime :: UTCTime
+ , infoFileContents :: B.ByteString
+ }
+
+newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
+ deriving (Monoid)
+
+getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
+getFileInfo fp tree = M.lookup fp $ unFileTree tree
+
+
+newtype PandocPure a = PandocPure {
+ unPandocPure :: ExceptT PandocError
+ (StateT CommonState (State PureState)) a
+ } deriving ( Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+runPure :: PandocPure a -> Either PandocError a
+runPure x = flip evalState def $
+ flip evalStateT def $
+ runExceptT $
+ unPandocPure x
+
+instance PandocMonad PandocPure where
+ lookupEnv s = do
+ env <- getsPureState stEnv
+ return (lookup s env)
+
+ getCurrentTime = getsPureState stTime
+
+ getCurrentTimeZone = getsPureState stTimeZone
+
+ newStdGen = do
+ g <- getsPureState stStdGen
+ let (_, nxtGen) = next g
+ modifyPureState $ \st -> st { stStdGen = nxtGen }
+ return g
+
+ newUniqueHash = do
+ uniqs <- getsPureState stUniqStore
+ case uniqs of
+ u : us -> do
+ modifyPureState $ \st -> st { stUniqStore = us }
+ return u
+ _ -> M.fail "uniq store ran out of elements"
+ openURL _ = throwError $ PandocSomeError "Cannot open URL in PandocPure"
+ readFileLazy fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return (BL.fromStrict bs)
+ Nothing -> throwError $ PandocFileReadError fp
+ readFileStrict fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return bs
+ Nothing -> throwError $ PandocFileReadError fp
+ readDataFile Nothing "reference.docx" = do
+ (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
+ readDataFile Nothing "reference.odt" = do
+ (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
+ readDataFile Nothing fname = do
+ let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+ readFileStrict fname'
+ readDataFile (Just userDir) fname = do
+ userDirFiles <- getsPureState stUserDataDir
+ case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of
+ Just bs -> return bs
+ Nothing -> readDataFile Nothing fname
+
+ glob s = do
+ fontFiles <- getsPureState stFontFiles
+ return (filter (match (compile s)) fontFiles)
+
+ getModificationTime fp = do
+ fps <- getsPureState stFiles
+ case infoFileMTime <$> (getFileInfo fp fps) of
+ Just tm -> return tm
+ Nothing -> throwError $ PandocFileReadError fp
+
+ getCommonState = PandocPure $ lift $ get
+ putCommonState x = PandocPure $ lift $ put x
+
+ logOutput _msg = return ()
+
+instance PandocMonad m => PandocMonad (ParsecT s st m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ readDataFile mbuserdir = lift . readDataFile mbuserdir
+ glob = lift . glob
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+instance PandocMonad m => PandocMonad (ReaderT r m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ readDataFile mbuserdir = lift . readDataFile mbuserdir
+ glob = lift . glob
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ readDataFile mbuserdir = lift . readDataFile mbuserdir
+ glob = lift . glob
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ readDataFile mbuserdir = lift . readDataFile mbuserdir
+ glob = lift . glob
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
+instance PandocMonad m => PandocMonad (StateT st m) where
+ lookupEnv = lift . lookupEnv
+ getCurrentTime = lift getCurrentTime
+ getCurrentTimeZone = lift getCurrentTimeZone
+ newStdGen = lift newStdGen
+ newUniqueHash = lift newUniqueHash
+ openURL = lift . openURL
+ readFileLazy = lift . readFileLazy
+ readFileStrict = lift . readFileStrict
+ readDataFile mbuserdir = lift . readDataFile mbuserdir
+ glob = lift . glob
+ getModificationTime = lift . getModificationTime
+ getCommonState = lift getCommonState
+ putCommonState = lift . putCommonState
+ logOutput = lift . logOutput
+
diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs
new file mode 100644
index 000000000..b1cde82a4
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Time.hs
@@ -0,0 +1,30 @@
+{-# LANGUAGE CPP #-}
+
+{-
+This compatibility module is needed because, in time 1.5, the
+`defaultTimeLocale` function was moved from System.Locale (in the
+old-locale library) into Data.Time.
+
+We support both behaviors because time 1.4 is a boot library for GHC
+7.8. time 1.5 is a boot library for GHC 7.10.
+
+When support is dropped for GHC 7.8, this module may be obsoleted.
+-}
+
+#if MIN_VERSION_time(1,5,0)
+module Text.Pandoc.Compat.Time (
+ module Data.Time
+)
+where
+import Data.Time
+
+#else
+module Text.Pandoc.Compat.Time (
+ module Data.Time,
+ defaultTimeLocale
+)
+where
+import Data.Time
+import System.Locale ( defaultTimeLocale )
+
+#endif
diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hsb
new file mode 100644
index 000000000..8786647c5
--- /dev/null
+++ b/src/Text/Pandoc/Data.hsb
@@ -0,0 +1,15 @@
+{-# LANGUAGE OverloadedStrings #-}
+-- to be processed using hsb2hs
+module Text.Pandoc.Data (dataFiles) where
+import qualified Data.ByteString as B
+import System.FilePath (splitDirectories)
+import qualified System.FilePath.Posix as Posix
+
+-- We ensure that the data files are stored using Posix
+-- path separators (/), even on Windows.
+dataFiles :: [(FilePath, B.ByteString)]
+dataFiles = map (\(fp, contents) ->
+ (Posix.joinPath (splitDirectories fp), contents)) dataFiles'
+
+dataFiles' :: [(FilePath, B.ByteString)]
+dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data"
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs
new file mode 100644
index 000000000..c9f368abc
--- /dev/null
+++ b/src/Text/Pandoc/Emoji.hs
@@ -0,0 +1,906 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Emoji
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Emoji symbol lookup from canonical string identifier.
+-}
+module Text.Pandoc.Emoji ( emojis ) where
+import qualified Data.Map as M
+
+emojis :: M.Map String String
+emojis = M.fromList
+ [("+1","\128077")
+ ,("-1","\128078")
+ ,("100","\128175")
+ ,("1234","\128290")
+ ,("8ball","\127921")
+ ,("a","\127344\65039")
+ ,("ab","\127374")
+ ,("abc","\128292")
+ ,("abcd","\128289")
+ ,("accept","\127569")
+ ,("aerial_tramway","\128673")
+ ,("airplane","\9992\65039")
+ ,("alarm_clock","\9200")
+ ,("alien","\128125")
+ ,("ambulance","\128657")
+ ,("anchor","\9875")
+ ,("angel","\128124")
+ ,("anger","\128162")
+ ,("angry","\128544")
+ ,("anguished","\128551")
+ ,("ant","\128028")
+ ,("apple","\127822")
+ ,("aquarius","\9810")
+ ,("aries","\9800")
+ ,("arrow_backward","\9664\65039")
+ ,("arrow_double_down","\9196")
+ ,("arrow_double_up","\9195")
+ ,("arrow_down","\11015\65039")
+ ,("arrow_down_small","\128317")
+ ,("arrow_forward","\9654\65039")
+ ,("arrow_heading_down","\10549\65039")
+ ,("arrow_heading_up","\10548\65039")
+ ,("arrow_left","\11013\65039")
+ ,("arrow_lower_left","\8601\65039")
+ ,("arrow_lower_right","\8600\65039")
+ ,("arrow_right","\10145\65039")
+ ,("arrow_right_hook","\8618\65039")
+ ,("arrow_up","\11014\65039")
+ ,("arrow_up_down","\8597\65039")
+ ,("arrow_up_small","\128316")
+ ,("arrow_upper_left","\8598\65039")
+ ,("arrow_upper_right","\8599\65039")
+ ,("arrows_clockwise","\128259")
+ ,("arrows_counterclockwise","\128260")
+ ,("art","\127912")
+ ,("articulated_lorry","\128667")
+ ,("astonished","\128562")
+ ,("athletic_shoe","\128095")
+ ,("atm","\127975")
+ ,("b","\127345\65039")
+ ,("baby","\128118")
+ ,("baby_bottle","\127868")
+ ,("baby_chick","\128036")
+ ,("baby_symbol","\128700")
+ ,("back","\128281")
+ ,("baggage_claim","\128708")
+ ,("balloon","\127880")
+ ,("ballot_box_with_check","\9745\65039")
+ ,("bamboo","\127885")
+ ,("banana","\127820")
+ ,("bangbang","\8252\65039")
+ ,("bank","\127974")
+ ,("bar_chart","\128202")
+ ,("barber","\128136")
+ ,("baseball","\9918\65039")
+ ,("basketball","\127936")
+ ,("bath","\128704")
+ ,("bathtub","\128705")
+ ,("battery","\128267")
+ ,("bear","\128059")
+ ,("bee","\128029")
+ ,("beer","\127866")
+ ,("beers","\127867")
+ ,("beetle","\128030")
+ ,("beginner","\128304")
+ ,("bell","\128276")
+ ,("bento","\127857")
+ ,("bicyclist","\128692")
+ ,("bike","\128690")
+ ,("bikini","\128089")
+ ,("bird","\128038")
+ ,("birthday","\127874")
+ ,("black_circle","\9899")
+ ,("black_joker","\127183")
+ ,("black_large_square","\11035")
+ ,("black_medium_small_square","\9726")
+ ,("black_medium_square","\9724\65039")
+ ,("black_nib","\10002\65039")
+ ,("black_small_square","\9642\65039")
+ ,("black_square_button","\128306")
+ ,("blossom","\127804")
+ ,("blowfish","\128033")
+ ,("blue_book","\128216")
+ ,("blue_car","\128665")
+ ,("blue_heart","\128153")
+ ,("blush","\128522")
+ ,("boar","\128023")
+ ,("boat","\9973")
+ ,("bomb","\128163")
+ ,("book","\128214")
+ ,("bookmark","\128278")
+ ,("bookmark_tabs","\128209")
+ ,("books","\128218")
+ ,("boom","\128165")
+ ,("boot","\128098")
+ ,("bouquet","\128144")
+ ,("bow","\128583")
+ ,("bowling","\127923")
+ ,("boy","\128102")
+ ,("bread","\127838")
+ ,("bride_with_veil","\128112")
+ ,("bridge_at_night","\127753")
+ ,("briefcase","\128188")
+ ,("broken_heart","\128148")
+ ,("bug","\128027")
+ ,("bulb","\128161")
+ ,("bullettrain_front","\128645")
+ ,("bullettrain_side","\128644")
+ ,("bus","\128652")
+ ,("busstop","\128655")
+ ,("bust_in_silhouette","\128100")
+ ,("busts_in_silhouette","\128101")
+ ,("cactus","\127797")
+ ,("cake","\127856")
+ ,("calendar","\128198")
+ ,("calling","\128242")
+ ,("camel","\128043")
+ ,("camera","\128247")
+ ,("cancer","\9803")
+ ,("candy","\127852")
+ ,("capital_abcd","\128288")
+ ,("capricorn","\9809")
+ ,("car","\128663")
+ ,("card_index","\128199")
+ ,("carousel_horse","\127904")
+ ,("cat","\128049")
+ ,("cat2","\128008")
+ ,("cd","\128191")
+ ,("chart","\128185")
+ ,("chart_with_downwards_trend","\128201")
+ ,("chart_with_upwards_trend","\128200")
+ ,("checkered_flag","\127937")
+ ,("cherries","\127826")
+ ,("cherry_blossom","\127800")
+ ,("chestnut","\127792")
+ ,("chicken","\128020")
+ ,("children_crossing","\128696")
+ ,("chocolate_bar","\127851")
+ ,("christmas_tree","\127876")
+ ,("church","\9962")
+ ,("cinema","\127910")
+ ,("circus_tent","\127914")
+ ,("city_sunrise","\127751")
+ ,("city_sunset","\127750")
+ ,("cl","\127377")
+ ,("clap","\128079")
+ ,("clapper","\127916")
+ ,("clipboard","\128203")
+ ,("clock1","\128336")
+ ,("clock10","\128345")
+ ,("clock1030","\128357")
+ ,("clock11","\128346")
+ ,("clock1130","\128358")
+ ,("clock12","\128347")
+ ,("clock1230","\128359")
+ ,("clock130","\128348")
+ ,("clock2","\128337")
+ ,("clock230","\128349")
+ ,("clock3","\128338")
+ ,("clock330","\128350")
+ ,("clock4","\128339")
+ ,("clock430","\128351")
+ ,("clock5","\128340")
+ ,("clock530","\128352")
+ ,("clock6","\128341")
+ ,("clock630","\128353")
+ ,("clock7","\128342")
+ ,("clock730","\128354")
+ ,("clock8","\128343")
+ ,("clock830","\128355")
+ ,("clock9","\128344")
+ ,("clock930","\128356")
+ ,("closed_book","\128213")
+ ,("closed_lock_with_key","\128272")
+ ,("closed_umbrella","\127746")
+ ,("cloud","\9729\65039")
+ ,("clubs","\9827\65039")
+ ,("cn","\127464\127475")
+ ,("cocktail","\127864")
+ ,("coffee","\9749")
+ ,("cold_sweat","\128560")
+ ,("collision","\128165")
+ ,("computer","\128187")
+ ,("confetti_ball","\127882")
+ ,("confounded","\128534")
+ ,("confused","\128533")
+ ,("congratulations","\12951\65039")
+ ,("construction","\128679")
+ ,("construction_worker","\128119")
+ ,("convenience_store","\127978")
+ ,("cookie","\127850")
+ ,("cool","\127378")
+ ,("cop","\128110")
+ ,("copyright","\169\65039")
+ ,("corn","\127805")
+ ,("couple","\128107")
+ ,("couple_with_heart","\128145")
+ ,("couplekiss","\128143")
+ ,("cow","\128046")
+ ,("cow2","\128004")
+ ,("credit_card","\128179")
+ ,("crescent_moon","\127769")
+ ,("crocodile","\128010")
+ ,("crossed_flags","\127884")
+ ,("crown","\128081")
+ ,("cry","\128546")
+ ,("crying_cat_face","\128575")
+ ,("crystal_ball","\128302")
+ ,("cupid","\128152")
+ ,("curly_loop","\10160")
+ ,("currency_exchange","\128177")
+ ,("curry","\127835")
+ ,("custard","\127854")
+ ,("customs","\128707")
+ ,("cyclone","\127744")
+ ,("dancer","\128131")
+ ,("dancers","\128111")
+ ,("dango","\127841")
+ ,("dart","\127919")
+ ,("dash","\128168")
+ ,("date","\128197")
+ ,("de","\127465\127466")
+ ,("deciduous_tree","\127795")
+ ,("department_store","\127980")
+ ,("diamond_shape_with_a_dot_inside","\128160")
+ ,("diamonds","\9830\65039")
+ ,("disappointed","\128542")
+ ,("disappointed_relieved","\128549")
+ ,("dizzy","\128171")
+ ,("dizzy_face","\128565")
+ ,("do_not_litter","\128687")
+ ,("dog","\128054")
+ ,("dog2","\128021")
+ ,("dollar","\128181")
+ ,("dolls","\127886")
+ ,("dolphin","\128044")
+ ,("door","\128682")
+ ,("doughnut","\127849")
+ ,("dragon","\128009")
+ ,("dragon_face","\128050")
+ ,("dress","\128087")
+ ,("dromedary_camel","\128042")
+ ,("droplet","\128167")
+ ,("dvd","\128192")
+ ,("e-mail","\128231")
+ ,("ear","\128066")
+ ,("ear_of_rice","\127806")
+ ,("earth_africa","\127757")
+ ,("earth_americas","\127758")
+ ,("earth_asia","\127759")
+ ,("egg","\127859")
+ ,("eggplant","\127814")
+ ,("eight","8\65039\8419")
+ ,("eight_pointed_black_star","\10036\65039")
+ ,("eight_spoked_asterisk","\10035\65039")
+ ,("electric_plug","\128268")
+ ,("elephant","\128024")
+ ,("email","\9993\65039")
+ ,("end","\128282")
+ ,("envelope","\9993\65039")
+ ,("envelope_with_arrow","\128233")
+ ,("es","\127466\127480")
+ ,("euro","\128182")
+ ,("european_castle","\127984")
+ ,("european_post_office","\127972")
+ ,("evergreen_tree","\127794")
+ ,("exclamation","\10071")
+ ,("expressionless","\128529")
+ ,("eyeglasses","\128083")
+ ,("eyes","\128064")
+ ,("facepunch","\128074")
+ ,("factory","\127981")
+ ,("fallen_leaf","\127810")
+ ,("family","\128106")
+ ,("fast_forward","\9193")
+ ,("fax","\128224")
+ ,("fearful","\128552")
+ ,("feet","\128062")
+ ,("ferris_wheel","\127905")
+ ,("file_folder","\128193")
+ ,("fire","\128293")
+ ,("fire_engine","\128658")
+ ,("fireworks","\127878")
+ ,("first_quarter_moon","\127763")
+ ,("first_quarter_moon_with_face","\127771")
+ ,("fish","\128031")
+ ,("fish_cake","\127845")
+ ,("fishing_pole_and_fish","\127907")
+ ,("fist","\9994")
+ ,("five","5\65039\8419")
+ ,("flags","\127887")
+ ,("flashlight","\128294")
+ ,("flipper","\128044")
+ ,("floppy_disk","\128190")
+ ,("flower_playing_cards","\127924")
+ ,("flushed","\128563")
+ ,("foggy","\127745")
+ ,("football","\127944")
+ ,("footprints","\128099")
+ ,("fork_and_knife","\127860")
+ ,("fountain","\9970")
+ ,("four","4\65039\8419")
+ ,("four_leaf_clover","\127808")
+ ,("fr","\127467\127479")
+ ,("free","\127379")
+ ,("fried_shrimp","\127844")
+ ,("fries","\127839")
+ ,("frog","\128056")
+ ,("frowning","\128550")
+ ,("fuelpump","\9981")
+ ,("full_moon","\127765")
+ ,("full_moon_with_face","\127773")
+ ,("game_die","\127922")
+ ,("gb","\127468\127463")
+ ,("gem","\128142")
+ ,("gemini","\9802")
+ ,("ghost","\128123")
+ ,("gift","\127873")
+ ,("gift_heart","\128157")
+ ,("girl","\128103")
+ ,("globe_with_meridians","\127760")
+ ,("goat","\128016")
+ ,("golf","\9971")
+ ,("grapes","\127815")
+ ,("green_apple","\127823")
+ ,("green_book","\128215")
+ ,("green_heart","\128154")
+ ,("grey_exclamation","\10069")
+ ,("grey_question","\10068")
+ ,("grimacing","\128556")
+ ,("grin","\128513")
+ ,("grinning","\128512")
+ ,("guardsman","\128130")
+ ,("guitar","\127928")
+ ,("gun","\128299")
+ ,("haircut","\128135")
+ ,("hamburger","\127828")
+ ,("hammer","\128296")
+ ,("hamster","\128057")
+ ,("hand","\9995")
+ ,("handbag","\128092")
+ ,("hankey","\128169")
+ ,("hash","#\65039\8419")
+ ,("hatched_chick","\128037")
+ ,("hatching_chick","\128035")
+ ,("headphones","\127911")
+ ,("hear_no_evil","\128585")
+ ,("heart","\10084\65039")
+ ,("heart_decoration","\128159")
+ ,("heart_eyes","\128525")
+ ,("heart_eyes_cat","\128571")
+ ,("heartbeat","\128147")
+ ,("heartpulse","\128151")
+ ,("hearts","\9829\65039")
+ ,("heavy_check_mark","\10004\65039")
+ ,("heavy_division_sign","\10135")
+ ,("heavy_dollar_sign","\128178")
+ ,("heavy_exclamation_mark","\10071")
+ ,("heavy_minus_sign","\10134")
+ ,("heavy_multiplication_x","\10006\65039")
+ ,("heavy_plus_sign","\10133")
+ ,("helicopter","\128641")
+ ,("herb","\127807")
+ ,("hibiscus","\127802")
+ ,("high_brightness","\128262")
+ ,("high_heel","\128096")
+ ,("hocho","\128298")
+ ,("honey_pot","\127855")
+ ,("honeybee","\128029")
+ ,("horse","\128052")
+ ,("horse_racing","\127943")
+ ,("hospital","\127973")
+ ,("hotel","\127976")
+ ,("hotsprings","\9832\65039")
+ ,("hourglass","\8987")
+ ,("hourglass_flowing_sand","\9203")
+ ,("house","\127968")
+ ,("house_with_garden","\127969")
+ ,("hushed","\128559")
+ ,("ice_cream","\127848")
+ ,("icecream","\127846")
+ ,("id","\127380")
+ ,("ideograph_advantage","\127568")
+ ,("imp","\128127")
+ ,("inbox_tray","\128229")
+ ,("incoming_envelope","\128232")
+ ,("information_desk_person","\128129")
+ ,("information_source","\8505\65039")
+ ,("innocent","\128519")
+ ,("interrobang","\8265\65039")
+ ,("iphone","\128241")
+ ,("it","\127470\127481")
+ ,("izakaya_lantern","\127982")
+ ,("jack_o_lantern","\127875")
+ ,("japan","\128510")
+ ,("japanese_castle","\127983")
+ ,("japanese_goblin","\128122")
+ ,("japanese_ogre","\128121")
+ ,("jeans","\128086")
+ ,("joy","\128514")
+ ,("joy_cat","\128569")
+ ,("jp","\127471\127477")
+ ,("key","\128273")
+ ,("keycap_ten","\128287")
+ ,("kimono","\128088")
+ ,("kiss","\128139")
+ ,("kissing","\128535")
+ ,("kissing_cat","\128573")
+ ,("kissing_closed_eyes","\128538")
+ ,("kissing_heart","\128536")
+ ,("kissing_smiling_eyes","\128537")
+ ,("knife","\128298")
+ ,("koala","\128040")
+ ,("koko","\127489")
+ ,("kr","\127472\127479")
+ ,("lantern","\127982")
+ ,("large_blue_circle","\128309")
+ ,("large_blue_diamond","\128311")
+ ,("large_orange_diamond","\128310")
+ ,("last_quarter_moon","\127767")
+ ,("last_quarter_moon_with_face","\127772")
+ ,("laughing","\128518")
+ ,("leaves","\127811")
+ ,("ledger","\128210")
+ ,("left_luggage","\128709")
+ ,("left_right_arrow","\8596\65039")
+ ,("leftwards_arrow_with_hook","\8617\65039")
+ ,("lemon","\127819")
+ ,("leo","\9804")
+ ,("leopard","\128006")
+ ,("libra","\9806")
+ ,("light_rail","\128648")
+ ,("link","\128279")
+ ,("lips","\128068")
+ ,("lipstick","\128132")
+ ,("lock","\128274")
+ ,("lock_with_ink_pen","\128271")
+ ,("lollipop","\127853")
+ ,("loop","\10175")
+ ,("loud_sound","\128266")
+ ,("loudspeaker","\128226")
+ ,("love_hotel","\127977")
+ ,("love_letter","\128140")
+ ,("low_brightness","\128261")
+ ,("m","\9410\65039")
+ ,("mag","\128269")
+ ,("mag_right","\128270")
+ ,("mahjong","\126980")
+ ,("mailbox","\128235")
+ ,("mailbox_closed","\128234")
+ ,("mailbox_with_mail","\128236")
+ ,("mailbox_with_no_mail","\128237")
+ ,("man","\128104")
+ ,("man_with_gua_pi_mao","\128114")
+ ,("man_with_turban","\128115")
+ ,("mans_shoe","\128094")
+ ,("maple_leaf","\127809")
+ ,("mask","\128567")
+ ,("massage","\128134")
+ ,("meat_on_bone","\127830")
+ ,("mega","\128227")
+ ,("melon","\127816")
+ ,("memo","\128221")
+ ,("mens","\128697")
+ ,("metro","\128647")
+ ,("microphone","\127908")
+ ,("microscope","\128300")
+ ,("milky_way","\127756")
+ ,("minibus","\128656")
+ ,("minidisc","\128189")
+ ,("mobile_phone_off","\128244")
+ ,("money_with_wings","\128184")
+ ,("moneybag","\128176")
+ ,("monkey","\128018")
+ ,("monkey_face","\128053")
+ ,("monorail","\128669")
+ ,("moon","\127764")
+ ,("mortar_board","\127891")
+ ,("mount_fuji","\128507")
+ ,("mountain_bicyclist","\128693")
+ ,("mountain_cableway","\128672")
+ ,("mountain_railway","\128670")
+ ,("mouse","\128045")
+ ,("mouse2","\128001")
+ ,("movie_camera","\127909")
+ ,("moyai","\128511")
+ ,("muscle","\128170")
+ ,("mushroom","\127812")
+ ,("musical_keyboard","\127929")
+ ,("musical_note","\127925")
+ ,("musical_score","\127932")
+ ,("mute","\128263")
+ ,("nail_care","\128133")
+ ,("name_badge","\128219")
+ ,("necktie","\128084")
+ ,("negative_squared_cross_mark","\10062")
+ ,("neutral_face","\128528")
+ ,("new","\127381")
+ ,("new_moon","\127761")
+ ,("new_moon_with_face","\127770")
+ ,("newspaper","\128240")
+ ,("ng","\127382")
+ ,("night_with_stars","\127747")
+ ,("nine","9\65039\8419")
+ ,("no_bell","\128277")
+ ,("no_bicycles","\128691")
+ ,("no_entry","\9940")
+ ,("no_entry_sign","\128683")
+ ,("no_good","\128581")
+ ,("no_mobile_phones","\128245")
+ ,("no_mouth","\128566")
+ ,("no_pedestrians","\128695")
+ ,("no_smoking","\128685")
+ ,("non-potable_water","\128689")
+ ,("nose","\128067")
+ ,("notebook","\128211")
+ ,("notebook_with_decorative_cover","\128212")
+ ,("notes","\127926")
+ ,("nut_and_bolt","\128297")
+ ,("o","\11093")
+ ,("o2","\127358\65039")
+ ,("ocean","\127754")
+ ,("octopus","\128025")
+ ,("oden","\127842")
+ ,("office","\127970")
+ ,("ok","\127383")
+ ,("ok_hand","\128076")
+ ,("ok_woman","\128582")
+ ,("older_man","\128116")
+ ,("older_woman","\128117")
+ ,("on","\128283")
+ ,("oncoming_automobile","\128664")
+ ,("oncoming_bus","\128653")
+ ,("oncoming_police_car","\128660")
+ ,("oncoming_taxi","\128662")
+ ,("one","1\65039\8419")
+ ,("open_book","\128214")
+ ,("open_file_folder","\128194")
+ ,("open_hands","\128080")
+ ,("open_mouth","\128558")
+ ,("ophiuchus","\9934")
+ ,("orange_book","\128217")
+ ,("outbox_tray","\128228")
+ ,("ox","\128002")
+ ,("package","\128230")
+ ,("page_facing_up","\128196")
+ ,("page_with_curl","\128195")
+ ,("pager","\128223")
+ ,("palm_tree","\127796")
+ ,("panda_face","\128060")
+ ,("paperclip","\128206")
+ ,("parking","\127359\65039")
+ ,("part_alternation_mark","\12349\65039")
+ ,("partly_sunny","\9925")
+ ,("passport_control","\128706")
+ ,("paw_prints","\128062")
+ ,("peach","\127825")
+ ,("pear","\127824")
+ ,("pencil","\128221")
+ ,("pencil2","\9999\65039")
+ ,("penguin","\128039")
+ ,("pensive","\128532")
+ ,("performing_arts","\127917")
+ ,("persevere","\128547")
+ ,("person_frowning","\128589")
+ ,("person_with_blond_hair","\128113")
+ ,("person_with_pouting_face","\128590")
+ ,("phone","\9742\65039")
+ ,("pig","\128055")
+ ,("pig2","\128022")
+ ,("pig_nose","\128061")
+ ,("pill","\128138")
+ ,("pineapple","\127821")
+ ,("pisces","\9811")
+ ,("pizza","\127829")
+ ,("point_down","\128071")
+ ,("point_left","\128072")
+ ,("point_right","\128073")
+ ,("point_up","\9757\65039")
+ ,("point_up_2","\128070")
+ ,("police_car","\128659")
+ ,("poodle","\128041")
+ ,("poop","\128169")
+ ,("post_office","\127971")
+ ,("postal_horn","\128239")
+ ,("postbox","\128238")
+ ,("potable_water","\128688")
+ ,("pouch","\128093")
+ ,("poultry_leg","\127831")
+ ,("pound","\128183")
+ ,("pouting_cat","\128574")
+ ,("pray","\128591")
+ ,("princess","\128120")
+ ,("punch","\128074")
+ ,("purple_heart","\128156")
+ ,("purse","\128091")
+ ,("pushpin","\128204")
+ ,("put_litter_in_its_place","\128686")
+ ,("question","\10067")
+ ,("rabbit","\128048")
+ ,("rabbit2","\128007")
+ ,("racehorse","\128014")
+ ,("radio","\128251")
+ ,("radio_button","\128280")
+ ,("rage","\128545")
+ ,("railway_car","\128643")
+ ,("rainbow","\127752")
+ ,("raised_hand","\9995")
+ ,("raised_hands","\128588")
+ ,("raising_hand","\128587")
+ ,("ram","\128015")
+ ,("ramen","\127836")
+ ,("rat","\128000")
+ ,("recycle","\9851\65039")
+ ,("red_car","\128663")
+ ,("red_circle","\128308")
+ ,("registered","\174\65039")
+ ,("relaxed","\9786\65039")
+ ,("relieved","\128524")
+ ,("repeat","\128257")
+ ,("repeat_one","\128258")
+ ,("restroom","\128699")
+ ,("revolving_hearts","\128158")
+ ,("rewind","\9194")
+ ,("ribbon","\127872")
+ ,("rice","\127834")
+ ,("rice_ball","\127833")
+ ,("rice_cracker","\127832")
+ ,("rice_scene","\127889")
+ ,("ring","\128141")
+ ,("rocket","\128640")
+ ,("roller_coaster","\127906")
+ ,("rooster","\128019")
+ ,("rose","\127801")
+ ,("rotating_light","\128680")
+ ,("round_pushpin","\128205")
+ ,("rowboat","\128675")
+ ,("ru","\127479\127482")
+ ,("rugby_football","\127945")
+ ,("runner","\127939")
+ ,("running","\127939")
+ ,("running_shirt_with_sash","\127933")
+ ,("sa","\127490\65039")
+ ,("sagittarius","\9808")
+ ,("sailboat","\9973")
+ ,("sake","\127862")
+ ,("sandal","\128097")
+ ,("santa","\127877")
+ ,("satellite","\128225")
+ ,("satisfied","\128518")
+ ,("saxophone","\127927")
+ ,("school","\127979")
+ ,("school_satchel","\127890")
+ ,("scissors","\9986\65039")
+ ,("scorpius","\9807")
+ ,("scream","\128561")
+ ,("scream_cat","\128576")
+ ,("scroll","\128220")
+ ,("seat","\128186")
+ ,("secret","\12953\65039")
+ ,("see_no_evil","\128584")
+ ,("seedling","\127793")
+ ,("seven","7\65039\8419")
+ ,("shaved_ice","\127847")
+ ,("sheep","\128017")
+ ,("shell","\128026")
+ ,("ship","\128674")
+ ,("shirt","\128085")
+ ,("shit","\128169")
+ ,("shoe","\128094")
+ ,("shower","\128703")
+ ,("signal_strength","\128246")
+ ,("six","6\65039\8419")
+ ,("six_pointed_star","\128303")
+ ,("ski","\127935")
+ ,("skull","\128128")
+ ,("sleeping","\128564")
+ ,("sleepy","\128554")
+ ,("slot_machine","\127920")
+ ,("small_blue_diamond","\128313")
+ ,("small_orange_diamond","\128312")
+ ,("small_red_triangle","\128314")
+ ,("small_red_triangle_down","\128315")
+ ,("smile","\128516")
+ ,("smile_cat","\128568")
+ ,("smiley","\128515")
+ ,("smiley_cat","\128570")
+ ,("smiling_imp","\128520")
+ ,("smirk","\128527")
+ ,("smirk_cat","\128572")
+ ,("smoking","\128684")
+ ,("snail","\128012")
+ ,("snake","\128013")
+ ,("snowboarder","\127938")
+ ,("snowflake","\10052\65039")
+ ,("snowman","\9924")
+ ,("sob","\128557")
+ ,("soccer","\9917")
+ ,("soon","\128284")
+ ,("sos","\127384")
+ ,("sound","\128265")
+ ,("space_invader","\128126")
+ ,("spades","\9824\65039")
+ ,("spaghetti","\127837")
+ ,("sparkle","\10055\65039")
+ ,("sparkler","\127879")
+ ,("sparkles","\10024")
+ ,("sparkling_heart","\128150")
+ ,("speak_no_evil","\128586")
+ ,("speaker","\128264")
+ ,("speech_balloon","\128172")
+ ,("speedboat","\128676")
+ ,("star","\11088")
+ ,("star2","\127775")
+ ,("stars","\127776")
+ ,("station","\128649")
+ ,("statue_of_liberty","\128509")
+ ,("steam_locomotive","\128642")
+ ,("stew","\127858")
+ ,("straight_ruler","\128207")
+ ,("strawberry","\127827")
+ ,("stuck_out_tongue","\128539")
+ ,("stuck_out_tongue_closed_eyes","\128541")
+ ,("stuck_out_tongue_winking_eye","\128540")
+ ,("sun_with_face","\127774")
+ ,("sunflower","\127803")
+ ,("sunglasses","\128526")
+ ,("sunny","\9728\65039")
+ ,("sunrise","\127749")
+ ,("sunrise_over_mountains","\127748")
+ ,("surfer","\127940")
+ ,("sushi","\127843")
+ ,("suspension_railway","\128671")
+ ,("sweat","\128531")
+ ,("sweat_drops","\128166")
+ ,("sweat_smile","\128517")
+ ,("sweet_potato","\127840")
+ ,("swimmer","\127946")
+ ,("symbols","\128291")
+ ,("syringe","\128137")
+ ,("tada","\127881")
+ ,("tanabata_tree","\127883")
+ ,("tangerine","\127818")
+ ,("taurus","\9801")
+ ,("taxi","\128661")
+ ,("tea","\127861")
+ ,("telephone","\9742\65039")
+ ,("telephone_receiver","\128222")
+ ,("telescope","\128301")
+ ,("tennis","\127934")
+ ,("tent","\9978")
+ ,("thought_balloon","\128173")
+ ,("three","3\65039\8419")
+ ,("thumbsdown","\128078")
+ ,("thumbsup","\128077")
+ ,("ticket","\127915")
+ ,("tiger","\128047")
+ ,("tiger2","\128005")
+ ,("tired_face","\128555")
+ ,("tm","\8482\65039")
+ ,("toilet","\128701")
+ ,("tokyo_tower","\128508")
+ ,("tomato","\127813")
+ ,("tongue","\128069")
+ ,("top","\128285")
+ ,("tophat","\127913")
+ ,("tractor","\128668")
+ ,("traffic_light","\128677")
+ ,("train","\128651")
+ ,("train2","\128646")
+ ,("tram","\128650")
+ ,("triangular_flag_on_post","\128681")
+ ,("triangular_ruler","\128208")
+ ,("trident","\128305")
+ ,("triumph","\128548")
+ ,("trolleybus","\128654")
+ ,("trophy","\127942")
+ ,("tropical_drink","\127865")
+ ,("tropical_fish","\128032")
+ ,("truck","\128666")
+ ,("trumpet","\127930")
+ ,("tshirt","\128085")
+ ,("tulip","\127799")
+ ,("turtle","\128034")
+ ,("tv","\128250")
+ ,("twisted_rightwards_arrows","\128256")
+ ,("two","2\65039\8419")
+ ,("two_hearts","\128149")
+ ,("two_men_holding_hands","\128108")
+ ,("two_women_holding_hands","\128109")
+ ,("u5272","\127545")
+ ,("u5408","\127540")
+ ,("u55b6","\127546")
+ ,("u6307","\127535")
+ ,("u6708","\127543\65039")
+ ,("u6709","\127542")
+ ,("u6e80","\127541")
+ ,("u7121","\127514")
+ ,("u7533","\127544")
+ ,("u7981","\127538")
+ ,("u7a7a","\127539")
+ ,("uk","\127468\127463")
+ ,("umbrella","\9748")
+ ,("unamused","\128530")
+ ,("underage","\128286")
+ ,("unlock","\128275")
+ ,("up","\127385")
+ ,("us","\127482\127480")
+ ,("v","\9996\65039")
+ ,("vertical_traffic_light","\128678")
+ ,("vhs","\128252")
+ ,("vibration_mode","\128243")
+ ,("video_camera","\128249")
+ ,("video_game","\127918")
+ ,("violin","\127931")
+ ,("virgo","\9805")
+ ,("volcano","\127755")
+ ,("vs","\127386")
+ ,("walking","\128694")
+ ,("waning_crescent_moon","\127768")
+ ,("waning_gibbous_moon","\127766")
+ ,("warning","\9888\65039")
+ ,("watch","\8986")
+ ,("water_buffalo","\128003")
+ ,("watermelon","\127817")
+ ,("wave","\128075")
+ ,("wavy_dash","\12336\65039")
+ ,("waxing_crescent_moon","\127762")
+ ,("waxing_gibbous_moon","\127764")
+ ,("wc","\128702")
+ ,("weary","\128553")
+ ,("wedding","\128146")
+ ,("whale","\128051")
+ ,("whale2","\128011")
+ ,("wheelchair","\9855")
+ ,("white_check_mark","\9989")
+ ,("white_circle","\9898")
+ ,("white_flower","\128174")
+ ,("white_large_square","\11036")
+ ,("white_medium_small_square","\9725")
+ ,("white_medium_square","\9723\65039")
+ ,("white_small_square","\9643\65039")
+ ,("white_square_button","\128307")
+ ,("wind_chime","\127888")
+ ,("wine_glass","\127863")
+ ,("wink","\128521")
+ ,("wolf","\128058")
+ ,("woman","\128105")
+ ,("womans_clothes","\128090")
+ ,("womans_hat","\128082")
+ ,("womens","\128698")
+ ,("worried","\128543")
+ ,("wrench","\128295")
+ ,("x","\10060")
+ ,("yellow_heart","\128155")
+ ,("yen","\128180")
+ ,("yum","\128523")
+ ,("zap","\9889")
+ ,("zero","0\65039\8419")
+ ,("zzz","\128164")
+ ]
+
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
new file mode 100644
index 000000000..65f912c88
--- /dev/null
+++ b/src/Text/Pandoc/Error.hs
@@ -0,0 +1,76 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Error
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This module provides a standard way to deal with possible errors encounted
+during parsing.
+
+-}
+module Text.Pandoc.Error (
+ PandocError(..),
+ handleError) where
+
+import Text.Parsec.Error
+import Text.Parsec.Pos hiding (Line)
+import Data.Generics (Typeable)
+import GHC.Generics (Generic)
+import Control.Exception (Exception)
+import Text.Pandoc.Shared (err)
+
+type Input = String
+
+data PandocError = PandocFileReadError FilePath
+ | PandocShouldNeverHappenError String
+ | PandocSomeError String
+ | PandocParseError String
+ | PandocParsecError Input ParseError
+ | PandocMakePDFError String
+ deriving (Show, Typeable, Generic)
+
+instance Exception PandocError
+
+-- | Handle PandocError by exiting with an error message.
+handleError :: Either PandocError a -> IO a
+handleError (Right r) = return r
+handleError (Left e) =
+ case e of
+ PandocFileReadError fp -> err 61 $ "problem reading " ++ fp
+ PandocShouldNeverHappenError s -> err 62 s
+ PandocSomeError s -> err 63 s
+ PandocParseError s -> err 64 s
+ PandocParsecError input err' ->
+ let errPos = errorPos err'
+ errLine = sourceLine errPos
+ errColumn = sourceColumn errPos
+ ls = lines input ++ [""]
+ errorInFile = if length ls > errLine - 1
+ then concat ["\n", (ls !! (errLine - 1))
+ ,"\n", replicate (errColumn - 1) ' '
+ ,"^"]
+ else ""
+ in err 65 $ "\nError at " ++ show err' ++ errorInFile
+ PandocMakePDFError s -> err 65 s
+
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
new file mode 100644
index 000000000..d5e59e8e1
--- /dev/null
+++ b/src/Text/Pandoc/Extensions.hs
@@ -0,0 +1,267 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-
+Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Extensions
+ Copyright : Copyright (C) 2012-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data structures and functions for representing markup extensions.
+-}
+module Text.Pandoc.Extensions ( Extension(..)
+ , Extensions
+ , emptyExtensions
+ , extensionsFromList
+ , extensionEnabled
+ , enableExtension
+ , disableExtension
+ , pandocExtensions
+ , plainExtensions
+ , strictExtensions
+ , phpMarkdownExtraExtensions
+ , githubMarkdownExtensions
+ , multimarkdownExtensions )
+where
+import Data.Bits (testBit, setBit, clearBit)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+
+newtype Extensions = Extensions Integer
+ deriving (Show, Read, Eq, Ord, Data, Typeable, Generic)
+
+extensionsFromList :: [Extension] -> Extensions
+extensionsFromList = foldr enableExtension emptyExtensions
+
+emptyExtensions :: Extensions
+emptyExtensions = Extensions 0
+
+extensionEnabled :: Extension -> Extensions -> Bool
+extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)
+
+enableExtension :: Extension -> Extensions -> Extensions
+enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))
+
+disableExtension :: Extension -> Extensions -> Extensions
+disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))
+
+-- | Individually selectable syntax extensions.
+data Extension =
+ Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
+ | Ext_inline_notes -- ^ Pandoc-style inline notes
+ | Ext_pandoc_title_block -- ^ Pandoc title block
+ | Ext_yaml_metadata_block -- ^ YAML metadata block
+ | Ext_mmd_title_block -- ^ Multimarkdown metadata block
+ | Ext_table_captions -- ^ Pandoc-style table captions
+ | Ext_implicit_figures -- ^ A paragraph with just an image is a figure
+ | Ext_simple_tables -- ^ Pandoc-style simple tables
+ | Ext_multiline_tables -- ^ Pandoc-style multiline tables
+ | Ext_grid_tables -- ^ Grid tables (pandoc, reST)
+ | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra)
+ | Ext_citations -- ^ Pandoc/citeproc citations
+ | Ext_raw_tex -- ^ Allow raw TeX (other than math)
+ | Ext_raw_html -- ^ Allow raw HTML
+ | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$
+ | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\]
+ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\]
+ | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
+ | Ext_fenced_code_blocks -- ^ Parse fenced code blocks
+ | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
+ | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
+ | Ext_inline_code_attributes -- ^ Allow attributes on inline code
+ | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
+ | Ext_native_spans -- ^ Use Span inlines for contents of <span>
+ | Ext_bracketed_spans -- ^ Bracketed spans with attributes
+ | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
+ -- iff container has attribute 'markdown'
+ | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
+ | Ext_link_attributes -- ^ link and image attributes
+ | Ext_mmd_link_attributes -- ^ MMD style reference link attributes
+ | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links
+ | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters
+ | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
+ | Ext_startnum -- ^ Make start number of ordered list significant
+ | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
+ | Ext_compact_definition_lists -- ^ Definition lists without
+ -- space between items, and disallow laziness
+ | Ext_example_lists -- ^ Markdown-style numbered examples
+ | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
+ | Ext_angle_brackets_escapable -- ^ Make < and > escapable
+ | Ext_intraword_underscores -- ^ Treat underscore inside word as literal
+ | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
+ | Ext_blank_before_header -- ^ Require blank line before a header
+ | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax
+ | Ext_superscript -- ^ Superscript using ^this^ syntax
+ | Ext_subscript -- ^ Subscript using ~this~ syntax
+ | Ext_hard_line_breaks -- ^ All newlines become hard line breaks
+ | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
+ | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
+ -- East Asian wide characters
+ | Ext_literate_haskell -- ^ Enable literate Haskell conventions
+ | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
+ | Ext_emoji -- ^ Support emoji like :smile:
+ | Ext_auto_identifiers -- ^ Automatic identifiers for headers
+ | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers
+ | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v}
+ | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
+ | Ext_implicit_header_references -- ^ Implicit reference links for headers
+ | Ext_line_blocks -- ^ RST style line blocks
+ | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
+ | Ext_shortcut_reference_links -- ^ Shortcut reference links
+ | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
+ | Ext_old_dashes -- ^ -- = em, - before number = en
+ deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
+
+pandocExtensions :: Extensions
+pandocExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_inline_notes
+ , Ext_pandoc_title_block
+ , Ext_yaml_metadata_block
+ , Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_pipe_tables
+ , Ext_citations
+ , Ext_raw_tex
+ , Ext_raw_html
+ , Ext_tex_math_dollars
+ , Ext_latex_macros
+ , Ext_fenced_code_blocks
+ , Ext_fenced_code_attributes
+ , Ext_backtick_code_blocks
+ , Ext_inline_code_attributes
+ , Ext_markdown_in_html_blocks
+ , Ext_native_divs
+ , Ext_native_spans
+ , Ext_bracketed_spans
+ , Ext_escaped_line_breaks
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_all_symbols_escapable
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_strikeout
+ , Ext_superscript
+ , Ext_subscript
+ , Ext_auto_identifiers
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_implicit_header_references
+ , Ext_line_blocks
+ , Ext_shortcut_reference_links
+ , Ext_smart
+ ]
+
+plainExtensions :: Extensions
+plainExtensions = extensionsFromList
+ [ Ext_table_captions
+ , Ext_implicit_figures
+ , Ext_simple_tables
+ , Ext_multiline_tables
+ , Ext_grid_tables
+ , Ext_latex_macros
+ , Ext_fancy_lists
+ , Ext_startnum
+ , Ext_definition_lists
+ , Ext_example_lists
+ , Ext_intraword_underscores
+ , Ext_blank_before_blockquote
+ , Ext_blank_before_header
+ , Ext_strikeout
+ ]
+
+phpMarkdownExtraExtensions :: Extensions
+phpMarkdownExtraExtensions = extensionsFromList
+ [ Ext_footnotes
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_fenced_code_blocks
+ , Ext_definition_lists
+ , Ext_intraword_underscores
+ , Ext_header_attributes
+ , Ext_link_attributes
+ , Ext_abbreviations
+ , Ext_shortcut_reference_links
+ ]
+
+githubMarkdownExtensions :: Extensions
+githubMarkdownExtensions = extensionsFromList
+ [ Ext_angle_brackets_escapable
+ , Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_fenced_code_blocks
+ , Ext_auto_identifiers
+ , Ext_ascii_identifiers
+ , Ext_backtick_code_blocks
+ , Ext_autolink_bare_uris
+ , Ext_intraword_underscores
+ , Ext_strikeout
+ , Ext_hard_line_breaks
+ , Ext_emoji
+ , Ext_lists_without_preceding_blankline
+ , Ext_shortcut_reference_links
+ ]
+
+multimarkdownExtensions :: Extensions
+multimarkdownExtensions = extensionsFromList
+ [ Ext_pipe_tables
+ , Ext_raw_html
+ , Ext_markdown_attribute
+ , Ext_mmd_link_attributes
+ -- , Ext_raw_tex
+ -- Note: MMD's raw TeX syntax requires raw TeX to be
+ -- enclosed in HTML comment
+ , Ext_tex_math_double_backslash
+ , Ext_intraword_underscores
+ , Ext_mmd_title_block
+ , Ext_footnotes
+ , Ext_definition_lists
+ , Ext_all_symbols_escapable
+ , Ext_implicit_header_references
+ , Ext_auto_identifiers
+ , Ext_mmd_header_identifiers
+ , Ext_implicit_figures
+ -- Note: MMD's syntax for superscripts and subscripts
+ -- is a bit more permissive than pandoc's, allowing
+ -- e^2 and a~1 instead of e^2^ and a~1~, so even with
+ -- these options we don't have full support for MMD
+ -- superscripts and subscripts, but there's no reason
+ -- not to include these:
+ , Ext_superscript
+ , Ext_subscript
+ ]
+
+strictExtensions :: Extensions
+strictExtensions = extensionsFromList
+ [ Ext_raw_html
+ , Ext_shortcut_reference_links
+ ]
+
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
new file mode 100644
index 000000000..df060915c
--- /dev/null
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -0,0 +1,223 @@
+{-
+Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Highlighting
+ Copyright : Copyright (C) 2008-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Exports functions for syntax highlighting.
+-}
+
+module Text.Pandoc.Highlighting ( highlightingStyles
+ , languages
+ , languagesByExtension
+ , highlight
+ , formatLaTeXInline
+ , formatLaTeXBlock
+ , styleToLaTeX
+ , formatHtmlInline
+ , formatHtmlBlock
+ , styleToCss
+ , pygments
+ , espresso
+ , zenburn
+ , tango
+ , kate
+ , monochrome
+ , haddock
+ , Style
+ , fromListingsLanguage
+ , toListingsLanguage
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
+import Skylighting
+import Data.Maybe (fromMaybe)
+import Data.Char (toLower)
+import qualified Data.Map as M
+import Control.Monad
+import qualified Data.Text as T
+
+highlightingStyles :: [(String, Style)]
+highlightingStyles =
+ [("pygments", pygments),
+ ("tango", tango),
+ ("espresso", espresso),
+ ("zenburn", zenburn),
+ ("kate", kate),
+ ("monochrome", monochrome),
+ ("breezedark", breezeDark),
+ ("haddock", haddock)]
+
+languages :: [String]
+languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap]
+
+languagesByExtension :: String -> [String]
+languagesByExtension ext =
+ [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext]
+
+highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
+ -> Attr -- ^ Attributes of the CodeBlock
+ -> String -- ^ Raw contents of the CodeBlock
+ -> Maybe a -- ^ Maybe the formatted result
+highlight formatter (_, classes, keyvals) rawCode =
+ let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals))
+ fmtOpts = defaultFormatOpts{
+ startNumber = firstNum,
+ numberLines = any (`elem`
+ ["number","numberLines", "number-lines"]) classes }
+ tokenizeOpts = TokenizerConfig{ syntaxMap = defaultSyntaxMap
+ , traceOutput = False }
+ classes' = map T.pack classes
+ rawCode' = T.pack rawCode
+ in case msum (map (\l -> lookupSyntax l defaultSyntaxMap) classes') of
+ Nothing
+ | numberLines fmtOpts -> Just
+ $ formatter fmtOpts{ codeClasses = [],
+ containerClasses = classes' }
+ $ map (\ln -> [(NormalTok, ln)]) $ T.lines rawCode'
+ | otherwise -> Nothing
+ Just syntax ->
+ case tokenize tokenizeOpts syntax rawCode' of
+ Right slines -> Just $
+ formatter fmtOpts{ codeClasses =
+ [T.toLower (sShortname syntax)],
+ containerClasses = classes' } slines
+ Left _ -> Nothing
+
+-- Functions for correlating latex listings package's language names
+-- with skylighting language names:
+
+langToListingsMap :: M.Map String String
+langToListingsMap = M.fromList langsList
+
+listingsToLangMap :: M.Map String String
+listingsToLangMap = M.fromList $ map switch langsList
+ where switch (a,b) = (b,a)
+
+langsList :: [(String, String)]
+langsList =
+ [("abap","ABAP"),
+ ("acm","ACM"),
+ ("acmscript","ACMscript"),
+ ("acsl","ACSL"),
+ ("ada","Ada"),
+ ("algol","Algol"),
+ ("ant","Ant"),
+ ("assembler","Assembler"),
+ ("gnuassembler","Assembler"),
+ ("awk","Awk"),
+ ("bash","bash"),
+ ("monobasic","Basic"),
+ ("purebasic","Basic"),
+ ("c","C"),
+ ("cpp","C++"),
+ ("c++","C++"),
+ ("ocaml","Caml"),
+ ("cil","CIL"),
+ ("clean","Clean"),
+ ("cobol","Cobol"),
+ ("comal80","Comal80"),
+ ("command.com","command.com"),
+ ("comsol","Comsol"),
+ ("csh","csh"),
+ ("delphi","Delphi"),
+ ("elan","Elan"),
+ ("erlang","erlang"),
+ ("euphoria","Euphoria"),
+ ("fortran","Fortran"),
+ ("gap","GAP"),
+ ("gcl","GCL"),
+ ("gnuplot","Gnuplot"),
+ ("hansl","hansl"),
+ ("haskell","Haskell"),
+ ("html","HTML"),
+ ("idl","IDL"),
+ ("inform","inform"),
+ ("java","Java"),
+ ("jvmis","JVMIS"),
+ ("ksh","ksh"),
+ ("lingo","Lingo"),
+ ("lisp","Lisp"),
+ ("commonlisp","Lisp"),
+ ("llvm","LLVM"),
+ ("logo","Logo"),
+ ("lua","Lua"),
+ ("make","make"),
+ ("makefile","make"),
+ ("mathematica","Mathematica"),
+ ("matlab","Matlab"),
+ ("mercury","Mercury"),
+ ("metapost","MetaPost"),
+ ("miranda","Miranda"),
+ ("mizar","Mizar"),
+ ("ml","ML"),
+ ("modula2","Modula-2"),
+ ("mupad","MuPAD"),
+ ("nastran","NASTRAN"),
+ ("oberon2","Oberon-2"),
+ ("ocl","OCL"),
+ ("octave","Octave"),
+ ("oz","Oz"),
+ ("pascal","Pascal"),
+ ("perl","Perl"),
+ ("php","PHP"),
+ ("pli","PL/I"),
+ ("plasm","Plasm"),
+ ("postscript","PostScript"),
+ ("pov","POV"),
+ ("prolog","Prolog"),
+ ("promela","Promela"),
+ ("pstricks","PSTricks"),
+ ("python","Python"),
+ ("r","R"),
+ ("reduce","Reduce"),
+ ("rexx","Rexx"),
+ ("rsl","RSL"),
+ ("ruby","Ruby"),
+ ("s","S"),
+ ("sas","SAS"),
+ ("scala","Scala"),
+ ("scilab","Scilab"),
+ ("sh","sh"),
+ ("shelxl","SHELXL"),
+ ("simula","Simula"),
+ ("sparql","SPARQL"),
+ ("sql","SQL"),
+ ("tcl","tcl"),
+ ("tex","TeX"),
+ ("latex","TeX"),
+ ("vbscript","VBScript"),
+ ("verilog","Verilog"),
+ ("vhdl","VHDL"),
+ ("vrml","VRML"),
+ ("xml","XML"),
+ ("xslt","XSLT")]
+
+-- | Determine listings language name from skylighting language name.
+toListingsLanguage :: String -> Maybe String
+toListingsLanguage lang = M.lookup (map toLower lang) langToListingsMap
+
+-- | Determine skylighting language name from listings language name.
+fromListingsLanguage :: String -> Maybe String
+fromListingsLanguage lang = M.lookup lang listingsToLangMap
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
new file mode 100644
index 000000000..cc22c06ca
--- /dev/null
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -0,0 +1,547 @@
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
+{-
+ Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+Module : Text.Pandoc.ImageSize
+Copyright : Copyright (C) 2011-2016 John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : John MacFarlane <jgm@berkeley.edu>
+Stability : alpha
+Portability : portable
+
+Functions for determining the size of a PNG, JPEG, or GIF image.
+-}
+module Text.Pandoc.ImageSize ( ImageType(..)
+ , imageType
+ , imageSize
+ , sizeInPixels
+ , sizeInPoints
+ , desiredSizeInPoints
+ , Dimension(..)
+ , Direction(..)
+ , dimension
+ , inInch
+ , inPoints
+ , numUnit
+ , showInInch
+ , showInPixel
+ , showFl
+ ) where
+import Data.ByteString (ByteString, unpack)
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy as BL
+import Data.Char (isDigit)
+import Control.Monad
+import Data.Bits
+import Data.Binary
+import Data.Binary.Get
+import Text.Pandoc.Shared (safeRead)
+import Data.Default (Default)
+import Numeric (showFFloat)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import qualified Data.Map as M
+import Control.Monad.Except
+import Data.Maybe (fromMaybe)
+
+-- quick and dirty functions to get image sizes
+-- algorithms borrowed from wwwis.pl
+
+data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show
+data Direction = Width | Height
+instance Show Direction where
+ show Width = "width"
+ show Height = "height"
+
+data Dimension = Pixel Integer
+ | Centimeter Double
+ | Inch Double
+ | Percent Double
+instance Show Dimension where
+ show (Pixel a) = show a ++ "px"
+ show (Centimeter a) = showFl a ++ "cm"
+ show (Inch a) = showFl a ++ "in"
+ show (Percent a) = show a ++ "%"
+
+data ImageSize = ImageSize{
+ pxX :: Integer
+ , pxY :: Integer
+ , dpiX :: Integer
+ , dpiY :: Integer
+ } deriving (Read, Show, Eq)
+instance Default ImageSize where
+ def = ImageSize 300 200 72 72
+
+showFl :: (RealFloat a) => a -> String
+showFl a = showFFloat (Just 5) a ""
+
+imageType :: ByteString -> Maybe ImageType
+imageType img = case B.take 4 img of
+ "\x89\x50\x4e\x47" -> return Png
+ "\x47\x49\x46\x38" -> return Gif
+ "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF
+ "\xff\xd8\xff\xe1" -> return Jpeg -- Exif
+ "%PDF" -> return Pdf
+ "%!PS"
+ | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
+ -> return Eps
+ _ -> mzero
+
+imageSize :: ByteString -> Either String ImageSize
+imageSize img =
+ case imageType img of
+ Just Png -> mbToEither "could not determine PNG size" $ pngSize img
+ Just Gif -> mbToEither "could not determine GIF size" $ gifSize img
+ Just Jpeg -> jpegSize img
+ Just Eps -> mbToEither "could not determine EPS size" $ epsSize img
+ Just Pdf -> Left "could not determine PDF size" -- TODO
+ Nothing -> Left "could not determine image type"
+ where mbToEither msg Nothing = Left msg
+ mbToEither _ (Just x) = Right x
+
+defaultSize :: (Integer, Integer)
+defaultSize = (72, 72)
+
+sizeInPixels :: ImageSize -> (Integer, Integer)
+sizeInPixels s = (pxX s, pxY s)
+
+-- | Calculate (height, width) in points using the image file's dpi metadata,
+-- using 72 Points == 1 Inch.
+sizeInPoints :: ImageSize -> (Double, Double)
+sizeInPoints s = (pxXf * 72 / dpiXf, pxYf * 72 / dpiYf)
+ where
+ pxXf = fromIntegral $ pxX s
+ pxYf = fromIntegral $ pxY s
+ dpiXf = fromIntegral $ dpiX s
+ dpiYf = fromIntegral $ dpiY s
+
+-- | Calculate (height, width) in points, considering the desired dimensions in the
+-- attribute, while falling back on the image file's dpi metadata if no dimensions
+-- are specified in the attribute (or only dimensions in percentages).
+desiredSizeInPoints :: WriterOptions -> Attr -> ImageSize -> (Double, Double)
+desiredSizeInPoints opts attr s =
+ case (getDim Width, getDim Height) of
+ (Just w, Just h) -> (w, h)
+ (Just w, Nothing) -> (w, w / ratio)
+ (Nothing, Just h) -> (h * ratio, h)
+ (Nothing, Nothing) -> sizeInPoints s
+ where
+ ratio = fromIntegral (pxX s) / fromIntegral (pxY s)
+ getDim dir = case (dimension dir attr) of
+ Just (Percent _) -> Nothing
+ Just dim -> Just $ inPoints opts dim
+ Nothing -> Nothing
+
+inPoints :: WriterOptions -> Dimension -> Double
+inPoints opts dim = 72 * inInch opts dim
+
+inInch :: WriterOptions -> Dimension -> Double
+inInch opts dim =
+ case dim of
+ (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts)
+ (Centimeter a) -> a * 0.3937007874
+ (Inch a) -> a
+ (Percent _) -> 0
+
+-- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000".
+-- Note: Dimensions in percentages are converted to the empty string.
+showInInch :: WriterOptions -> Dimension -> String
+showInInch _ (Percent _) = ""
+showInInch opts dim = showFl $ inInch opts dim
+
+-- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600".
+-- Note: Dimensions in percentages are converted to the empty string.
+showInPixel :: WriterOptions -> Dimension -> String
+showInPixel opts dim =
+ case dim of
+ (Pixel a) -> show a
+ (Centimeter a) -> show (floor $ dpi * a * 0.3937007874 :: Int)
+ (Inch a) -> show (floor $ dpi * a :: Int)
+ (Percent _) -> ""
+ where
+ dpi = fromIntegral $ writerDpi opts
+
+-- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm")
+numUnit :: String -> Maybe (Double, String)
+numUnit s =
+ let (nums, unit) = span (\c -> isDigit c || ('.'==c)) s
+ in case safeRead nums of
+ Just n -> Just (n, unit)
+ Nothing -> Nothing
+
+-- | Read a Dimension from an Attr attribute.
+-- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc.
+dimension :: Direction -> Attr -> Maybe Dimension
+dimension dir (_, _, kvs) =
+ case dir of
+ Width -> extractDim "width"
+ Height -> extractDim "height"
+ where
+ extractDim key =
+ case lookup key kvs of
+ Just str ->
+ case numUnit str of
+ Just (num, unit) -> toDim num unit
+ Nothing -> Nothing
+ Nothing -> Nothing
+ toDim a "cm" = Just $ Centimeter a
+ toDim a "mm" = Just $ Centimeter (a / 10)
+ toDim a "in" = Just $ Inch a
+ toDim a "inch" = Just $ Inch a
+ toDim a "%" = Just $ Percent a
+ toDim a "px" = Just $ Pixel (floor a::Integer)
+ toDim a "" = Just $ Pixel (floor a::Integer)
+ toDim _ _ = Nothing
+
+epsSize :: ByteString -> Maybe ImageSize
+epsSize img = do
+ let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img
+ let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls
+ case ls' of
+ [] -> mzero
+ (x:_) -> case B.words x of
+ (_:_:_:ux:uy:[]) -> do
+ ux' <- safeRead $ B.unpack ux
+ uy' <- safeRead $ B.unpack uy
+ return ImageSize{
+ pxX = ux'
+ , pxY = uy'
+ , dpiX = 72
+ , dpiY = 72 }
+ _ -> mzero
+
+pngSize :: ByteString -> Maybe ImageSize
+pngSize img = do
+ let (h, rest) = B.splitAt 8 img
+ guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
+ h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+ let (i, rest') = B.splitAt 4 $ B.drop 4 rest
+ guard $ i == "MHDR" || i == "IHDR"
+ let (sizes, rest'') = B.splitAt 8 rest'
+ (x,y) <- case map fromIntegral $ unpack $ sizes of
+ ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
+ ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
+ (shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
+ _ -> Nothing -- "PNG parse error"
+ let (dpix, dpiy) = findpHYs rest''
+ return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
+
+findpHYs :: ByteString -> (Integer, Integer)
+findpHYs x =
+ if B.null x || "IDAT" `B.isPrefixOf` x
+ then (72,72) -- default, no pHYs
+ else if "pHYs" `B.isPrefixOf` x
+ then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral
+ $ unpack $ B.take 9 $ B.drop 4 x
+ factor = if u == 1 -- dots per meter
+ then \z -> z * 254 `div` 10000
+ else const 72
+ in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4,
+ factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 )
+ else findpHYs $ B.drop 1 x -- read another byte
+
+gifSize :: ByteString -> Maybe ImageSize
+gifSize img = do
+ let (h, rest) = B.splitAt 6 img
+ guard $ h == "GIF87a" || h == "GIF89a"
+ case map fromIntegral $ unpack $ B.take 4 rest of
+ [w2,w1,h2,h1] -> return ImageSize {
+ pxX = shift w1 8 + w2,
+ pxY = shift h1 8 + h2,
+ dpiX = 72,
+ dpiY = 72
+ }
+ _ -> Nothing -- "GIF parse error"
+
+jpegSize :: ByteString -> Either String ImageSize
+jpegSize img =
+ let (hdr, rest) = B.splitAt 4 img
+ in if B.length rest < 14
+ then Left "unable to determine JPEG size"
+ else case hdr of
+ "\xff\xd8\xff\xe0" -> jfifSize rest
+ "\xff\xd8\xff\xe1" -> exifSize rest
+ _ -> Left "unable to determine JPEG size"
+
+jfifSize :: ByteString -> Either String ImageSize
+jfifSize rest =
+ let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
+ $ unpack $ B.take 5 $ B.drop 9 $ rest
+ factor = case dpiDensity of
+ 1 -> id
+ 2 -> \x -> (x * 254 `div` 10)
+ _ -> const 72
+ dpix = factor (shift dpix1 8 + dpix2)
+ dpiy = factor (shift dpiy1 8 + dpiy2)
+ in case findJfifSize rest of
+ Left msg -> Left msg
+ Right (w,h) -> Right $ ImageSize { pxX = w
+ , pxY = h
+ , dpiX = dpix
+ , dpiY = dpiy }
+
+findJfifSize :: ByteString -> Either String (Integer,Integer)
+findJfifSize bs =
+ let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
+ in case B.uncons bs' of
+ Just (c,bs'') | c >= '\xc0' && c <= '\xc3' ->
+ case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
+ [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2)
+ _ -> Left "JFIF parse error"
+ Just (_,bs'') ->
+ case map fromIntegral $ unpack $ B.take 2 bs'' of
+ [c1,c2] ->
+ let len = shift c1 8 + c2
+ -- skip variables
+ in findJfifSize $ B.drop len bs''
+ _ -> Left "JFIF parse error"
+ Nothing -> Left "Did not find JFIF length record"
+
+runGet' :: Get (Either String a) -> BL.ByteString -> Either String a
+runGet' p bl =
+#if MIN_VERSION_binary(0,7,0)
+ case runGetOrFail p bl of
+ Left (_,_,msg) -> Left msg
+ Right (_,_,x) -> x
+#else
+ runGet p bl
+#endif
+
+
+exifSize :: ByteString -> Either String ImageSize
+exifSize bs = runGet' header $ bl
+ where bl = BL.fromChunks [bs]
+ header = runExceptT $ exifHeader bl
+-- NOTE: It would be nicer to do
+-- runGet ((Just <$> exifHeader) <|> return Nothing)
+-- which would prevent pandoc from raising an error when an exif header can't
+-- be parsed. But we only get an Alternative instance for Get in binary 0.6,
+-- and binary 0.5 ships with ghc 7.6.
+
+exifHeader :: BL.ByteString -> ExceptT String Get ImageSize
+exifHeader hdr = do
+ _app1DataSize <- lift getWord16be
+ exifHdr <- lift getWord32be
+ unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
+ zeros <- lift getWord16be
+ unless (zeros == 0) $ throwError "Expected zeros after exif header"
+ -- beginning of tiff header -- we read whole thing to use
+ -- in getting data from offsets:
+ let tiffHeader = BL.drop 8 hdr
+ byteAlign <- lift getWord16be
+ let bigEndian = byteAlign == 0x4d4d
+ let (getWord16, getWord32, getWord64) =
+ if bigEndian
+ then (getWord16be, getWord32be, getWord64be)
+ else (getWord16le, getWord32le, getWord64le)
+ let getRational = do
+ num <- getWord32
+ den <- getWord32
+ return $ fromIntegral num / fromIntegral den
+ tagmark <- lift getWord16
+ unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
+ ifdOffset <- lift getWord32
+ lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
+ numentries <- lift getWord16
+ let ifdEntry :: ExceptT String Get (TagType, DataFormat)
+ ifdEntry = do
+ tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
+ <$> lift getWord16
+ dataFormat <- lift getWord16
+ numComponents <- lift getWord32
+ (fmt, bytesPerComponent) <-
+ case dataFormat of
+ 1 -> return (UnsignedByte <$> getWord8, 1)
+ 2 -> return (AsciiString <$>
+ getLazyByteString
+ (fromIntegral numComponents), 1)
+ 3 -> return (UnsignedShort <$> getWord16, 2)
+ 4 -> return (UnsignedLong <$> getWord32, 4)
+ 5 -> return (UnsignedRational <$> getRational, 8)
+ 6 -> return (SignedByte <$> getWord8, 1)
+ 7 -> return (Undefined <$> getLazyByteString
+ (fromIntegral numComponents), 1)
+ 8 -> return (SignedShort <$> getWord16, 2)
+ 9 -> return (SignedLong <$> getWord32, 4)
+ 10 -> return (SignedRational <$> getRational, 8)
+ 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4)
+ 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8)
+ _ -> throwError $ "Unknown data format " ++ show dataFormat
+ let totalBytes = fromIntegral $ numComponents * bytesPerComponent
+ payload <- if totalBytes <= 4 -- data is right here
+ then lift $ fmt <* skip (4 - totalBytes)
+ else do -- get data from offset
+ offs <- lift getWord32
+ let bytesAtOffset =
+ BL.take (fromIntegral totalBytes)
+ $ BL.drop (fromIntegral offs) tiffHeader
+ case runGet' (Right <$> fmt) bytesAtOffset of
+ Left msg -> throwError msg
+ Right x -> return x
+ return (tag, payload)
+ entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
+ subentries <- case lookup ExifOffset entries of
+ Just (UnsignedLong offset') -> do
+ pos <- lift bytesRead
+ lift $ skip (fromIntegral offset' - (fromIntegral pos - 8))
+ numsubentries <- lift getWord16
+ sequence $
+ replicate (fromIntegral numsubentries) ifdEntry
+ _ -> return []
+ let allentries = entries ++ subentries
+ (wdth, hght) <- case (lookup ExifImageWidth allentries,
+ lookup ExifImageHeight allentries) of
+ (Just (UnsignedLong w), Just (UnsignedLong h)) ->
+ return (fromIntegral w, fromIntegral h)
+ _ -> return defaultSize
+ -- we return a default width and height when
+ -- the exif header doesn't contain these
+ let resfactor = case lookup ResolutionUnit allentries of
+ Just (UnsignedShort 1) -> (100 / 254)
+ _ -> 1
+ let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+ $ lookup XResolution allentries
+ let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+ $ lookup YResolution allentries
+ return $ ImageSize{
+ pxX = wdth
+ , pxY = hght
+ , dpiX = xres
+ , dpiY = yres }
+
+data DataFormat = UnsignedByte Word8
+ | AsciiString BL.ByteString
+ | UnsignedShort Word16
+ | UnsignedLong Word32
+ | UnsignedRational Rational
+ | SignedByte Word8
+ | Undefined BL.ByteString
+ | SignedShort Word16
+ | SignedLong Word32
+ | SignedRational Rational
+ | SingleFloat Word32
+ | DoubleFloat Word64
+ deriving (Show)
+
+data TagType = ImageDescription
+ | Make
+ | Model
+ | Orientation
+ | XResolution
+ | YResolution
+ | ResolutionUnit
+ | Software
+ | DateTime
+ | WhitePoint
+ | PrimaryChromaticities
+ | YCbCrCoefficients
+ | YCbCrPositioning
+ | ReferenceBlackWhite
+ | Copyright
+ | ExifOffset
+ | ExposureTime
+ | FNumber
+ | ExposureProgram
+ | ISOSpeedRatings
+ | ExifVersion
+ | DateTimeOriginal
+ | DateTimeDigitized
+ | ComponentConfiguration
+ | CompressedBitsPerPixel
+ | ShutterSpeedValue
+ | ApertureValue
+ | BrightnessValue
+ | ExposureBiasValue
+ | MaxApertureValue
+ | SubjectDistance
+ | MeteringMode
+ | LightSource
+ | Flash
+ | FocalLength
+ | MakerNote
+ | UserComment
+ | FlashPixVersion
+ | ColorSpace
+ | ExifImageWidth
+ | ExifImageHeight
+ | RelatedSoundFile
+ | ExifInteroperabilityOffset
+ | FocalPlaneXResolution
+ | FocalPlaneYResolution
+ | FocalPlaneResolutionUnit
+ | SensingMethod
+ | FileSource
+ | SceneType
+ | UnknownTagType
+ deriving (Show, Eq, Ord)
+
+tagTypeTable :: M.Map Word16 TagType
+tagTypeTable = M.fromList
+ [ (0x010e, ImageDescription)
+ , (0x010f, Make)
+ , (0x0110, Model)
+ , (0x0112, Orientation)
+ , (0x011a, XResolution)
+ , (0x011b, YResolution)
+ , (0x0128, ResolutionUnit)
+ , (0x0131, Software)
+ , (0x0132, DateTime)
+ , (0x013e, WhitePoint)
+ , (0x013f, PrimaryChromaticities)
+ , (0x0211, YCbCrCoefficients)
+ , (0x0213, YCbCrPositioning)
+ , (0x0214, ReferenceBlackWhite)
+ , (0x8298, Copyright)
+ , (0x8769, ExifOffset)
+ , (0x829a, ExposureTime)
+ , (0x829d, FNumber)
+ , (0x8822, ExposureProgram)
+ , (0x8827, ISOSpeedRatings)
+ , (0x9000, ExifVersion)
+ , (0x9003, DateTimeOriginal)
+ , (0x9004, DateTimeDigitized)
+ , (0x9101, ComponentConfiguration)
+ , (0x9102, CompressedBitsPerPixel)
+ , (0x9201, ShutterSpeedValue)
+ , (0x9202, ApertureValue)
+ , (0x9203, BrightnessValue)
+ , (0x9204, ExposureBiasValue)
+ , (0x9205, MaxApertureValue)
+ , (0x9206, SubjectDistance)
+ , (0x9207, MeteringMode)
+ , (0x9208, LightSource)
+ , (0x9209, Flash)
+ , (0x920a, FocalLength)
+ , (0x927c, MakerNote)
+ , (0x9286, UserComment)
+ , (0xa000, FlashPixVersion)
+ , (0xa001, ColorSpace)
+ , (0xa002, ExifImageWidth)
+ , (0xa003, ExifImageHeight)
+ , (0xa004, RelatedSoundFile)
+ , (0xa005, ExifInteroperabilityOffset)
+ , (0xa20e, FocalPlaneXResolution)
+ , (0xa20f, FocalPlaneYResolution)
+ , (0xa210, FocalPlaneResolutionUnit)
+ , (0xa217, SensingMethod)
+ , (0xa300, FileSource)
+ , (0xa301, SceneType)
+ ]
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
new file mode 100644
index 000000000..1f98d019e
--- /dev/null
+++ b/src/Text/Pandoc/Logging.hs
@@ -0,0 +1,232 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-}
+{-
+Copyright (C) 2016-17 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Logging
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This module provides data types and functions for warnings
+and info messages.
+
+-}
+module Text.Pandoc.Logging (
+ Verbosity(..)
+ , LogMessage(..)
+ , encodeLogMessages
+ , showLogMessage
+ , messageVerbosity
+ ) where
+
+import Text.Parsec.Pos
+import Data.Data (Data)
+import Data.Generics (Typeable)
+import GHC.Generics (Generic)
+import qualified Data.Text as Text
+import Data.Aeson
+import Text.Pandoc.Definition
+import Data.Aeson.Encode.Pretty (encodePretty', keyOrder,
+ defConfig, Config(..))
+import qualified Data.ByteString.Lazy as BL
+
+-- | Verbosity level.
+data Verbosity = ERROR | WARNING | INFO | DEBUG
+ deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic)
+
+instance ToJSON Verbosity where
+ toJSON x = toJSON (show x)
+
+data LogMessage =
+ SkippedContent String SourcePos
+ | CouldNotParseYamlMetadata String SourcePos
+ | DuplicateLinkReference String SourcePos
+ | DuplicateNoteReference String SourcePos
+ | ReferenceNotFound String SourcePos
+ | CircularReference String SourcePos
+ | ParsingUnescaped String SourcePos
+ | CouldNotLoadIncludeFile String SourcePos
+ | ParsingTrace String SourcePos
+ | InlineNotRendered Inline
+ | BlockNotRendered Block
+ | DocxParserWarning String
+ | CouldNotFetchResource String String
+ | CouldNotDetermineImageSize String String
+ | CouldNotDetermineMimeType String
+ | CouldNotConvertTeXMath String String
+ deriving (Show, Eq, Data, Ord, Typeable, Generic)
+
+instance ToJSON LogMessage where
+ toJSON x = object $ "verbosity" .= toJSON (messageVerbosity x) :
+ case x of
+ SkippedContent s pos ->
+ ["type" .= String "SkippedContent",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= sourceLine pos,
+ "column" .= sourceColumn pos]
+ CouldNotParseYamlMetadata s pos ->
+ ["type" .= String "YamlSectionNotAnObject",
+ "message" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ DuplicateLinkReference s pos ->
+ ["type" .= String "DuplicateLinkReference",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ DuplicateNoteReference s pos ->
+ ["type" .= String "DuplicateNoteReference",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ ReferenceNotFound s pos ->
+ ["type" .= String "ReferenceNotFound",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ CircularReference s pos ->
+ ["type" .= String "CircularReference",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ ParsingUnescaped s pos ->
+ ["type" .= String "ParsingUnescaped",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ CouldNotLoadIncludeFile fp pos ->
+ ["type" .= String "CouldNotLoadIncludeFile",
+ "path" .= Text.pack fp,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
+ ParsingTrace s pos ->
+ ["type" .= String "ParsingTrace",
+ "contents" .= Text.pack s,
+ "source" .= Text.pack (sourceName pos),
+ "line" .= sourceLine pos,
+ "column" .= sourceColumn pos]
+ InlineNotRendered il ->
+ ["type" .= String "InlineNotRendered",
+ "contents" .= toJSON il]
+ BlockNotRendered bl ->
+ ["type" .= String "BlockNotRendered",
+ "contents" .= toJSON bl]
+ DocxParserWarning s ->
+ ["type" .= String "DocxParserWarning",
+ "contents" .= Text.pack s]
+ CouldNotFetchResource fp s ->
+ ["type" .= String "CouldNotFetchResource",
+ "path" .= Text.pack fp,
+ "message" .= Text.pack s]
+ CouldNotDetermineImageSize fp s ->
+ ["type" .= String "CouldNotDetermineImageSize",
+ "path" .= Text.pack fp,
+ "message" .= Text.pack s]
+ CouldNotDetermineMimeType fp ->
+ ["type" .= String "CouldNotDetermineMimeType",
+ "path" .= Text.pack fp]
+ CouldNotConvertTeXMath s msg ->
+ ["type" .= String "CouldNotConvertTeXMath",
+ "contents" .= Text.pack s,
+ "message" .= Text.pack msg]
+
+showPos :: SourcePos -> String
+showPos pos = sn ++ "line " ++
+ show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
+ where sn = if sourceName pos == "source" || sourceName pos == ""
+ then ""
+ else sourceName pos ++ " "
+
+encodeLogMessages :: [LogMessage] -> BL.ByteString
+encodeLogMessages ms =
+ encodePretty' defConfig{ confCompare =
+ keyOrder [ "type", "verbosity", "contents", "message", "path",
+ "source", "line", "column" ] } ms
+
+showLogMessage :: LogMessage -> String
+showLogMessage msg =
+ case msg of
+ SkippedContent s pos ->
+ "Skipped '" ++ s ++ "' at " ++ showPos pos
+ CouldNotParseYamlMetadata s pos ->
+ "Could not parse YAML metadata at " ++ showPos pos ++
+ if null s then "" else (": " ++ s)
+ DuplicateLinkReference s pos ->
+ "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos
+ DuplicateNoteReference s pos ->
+ "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos
+ ReferenceNotFound s pos ->
+ "Reference not found for '" ++ s ++ "' at " ++ showPos pos
+ CircularReference s pos ->
+ "Circular reference '" ++ s ++ "' at " ++ showPos pos
+ ParsingUnescaped s pos ->
+ "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos
+ CouldNotLoadIncludeFile fp pos ->
+ "Could not load include file '" ++ fp ++ "' at " ++ showPos pos
+ ParsingTrace s pos ->
+ "Parsing trace at " ++ showPos pos ++ ": " ++ s
+ InlineNotRendered il ->
+ "Not rendering " ++ show il
+ BlockNotRendered bl ->
+ "Not rendering " ++ show bl
+ DocxParserWarning s ->
+ "Docx parser warning: " ++ s
+ CouldNotFetchResource fp s ->
+ "Could not fetch resource '" ++ fp ++ "'" ++
+ if null s then "" else (": " ++ s)
+ CouldNotDetermineImageSize fp s ->
+ "Could not determine image size for '" ++ fp ++ "'" ++
+ if null s then "" else (": " ++ s)
+ CouldNotDetermineMimeType fp ->
+ "Could not determine mime type for '" ++ fp ++ "'"
+ CouldNotConvertTeXMath s m ->
+ "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++
+ if null m then "" else (':':'\n':m)
+
+messageVerbosity:: LogMessage -> Verbosity
+messageVerbosity msg =
+ case msg of
+ SkippedContent{} -> INFO
+ CouldNotParseYamlMetadata{} -> WARNING
+ DuplicateLinkReference{} -> WARNING
+ DuplicateNoteReference{} -> WARNING
+ ReferenceNotFound{} -> WARNING
+ CircularReference{} -> WARNING
+ CouldNotLoadIncludeFile{} -> WARNING
+ ParsingUnescaped{} -> INFO
+ ParsingTrace{} -> DEBUG
+ InlineNotRendered{} -> INFO
+ BlockNotRendered{} -> INFO
+ DocxParserWarning{} -> WARNING
+ CouldNotFetchResource{} -> WARNING
+ CouldNotDetermineImageSize{} -> WARNING
+ CouldNotDetermineMimeType{} -> WARNING
+ CouldNotConvertTeXMath{} -> WARNING
+
+
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
new file mode 100644
index 000000000..a08091217
--- /dev/null
+++ b/src/Text/Pandoc/MIME.hs
@@ -0,0 +1,527 @@
+{-
+Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MIME
+ Copyright : Copyright (C) 2011-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Mime type lookup for ODT writer.
+-}
+module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
+ extensionFromMimeType )where
+import System.FilePath
+import Data.Char ( toLower )
+import Data.List (isPrefixOf, isSuffixOf)
+import Data.Maybe (fromMaybe)
+import qualified Data.Map as M
+
+type MimeType = String
+
+-- | Determine mime type appropriate for file path.
+getMimeType :: FilePath -> Maybe MimeType
+getMimeType fp
+ -- ODT
+ | fp == "layout-cache" =
+ Just "application/binary"
+ | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp =
+ Just "application/vnd.oasis.opendocument.formula"
+ -- generic
+ | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes
+
+-- | Determime mime type appropriate for file path, defaulting to
+-- “application/octet-stream” if nothing else fits.
+getMimeTypeDef :: FilePath -> MimeType
+getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType
+
+extensionFromMimeType :: MimeType -> Maybe String
+extensionFromMimeType mimetype =
+ M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
+ -- note: we just look up the basic mime type, dropping the content-encoding etc.
+
+reverseMimeTypes :: M.Map MimeType String
+reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
+
+mimeTypes :: M.Map String MimeType
+mimeTypes = M.fromList mimeTypesList
+
+mimeTypesList :: [(String, MimeType)]
+mimeTypesList = -- List borrowed from happstack-server.
+ [("gz","application/x-gzip")
+ ,("cabal","application/x-cabal")
+ ,("%","application/x-trash")
+ ,("323","text/h323")
+ ,("3gp","video/3gpp")
+ ,("7z","application/x-7z-compressed")
+ ,("abw","application/x-abiword")
+ ,("ai","application/postscript")
+ ,("aif","audio/x-aiff")
+ ,("aifc","audio/x-aiff")
+ ,("aiff","audio/x-aiff")
+ ,("alc","chemical/x-alchemy")
+ ,("art","image/x-jg")
+ ,("asc","text/plain")
+ ,("asf","video/x-ms-asf")
+ ,("asn","chemical/x-ncbi-asn1")
+ ,("aso","chemical/x-ncbi-asn1-binary")
+ ,("asx","video/x-ms-asf")
+ ,("atom","application/atom")
+ ,("atomcat","application/atomcat+xml")
+ ,("atomsrv","application/atomserv+xml")
+ ,("au","audio/basic")
+ ,("avi","video/x-msvideo")
+ ,("b","chemical/x-molconn-Z")
+ ,("bak","application/x-trash")
+ ,("bat","application/x-msdos-program")
+ ,("bcpio","application/x-bcpio")
+ ,("bib","text/x-bibtex")
+ ,("bin","application/octet-stream")
+ ,("bmp","image/x-ms-bmp")
+ ,("boo","text/x-boo")
+ ,("book","application/x-maker")
+ ,("bsd","chemical/x-crossfire")
+ ,("c","text/x-csrc")
+ ,("c++","text/x-c++src")
+ ,("c3d","chemical/x-chem3d")
+ ,("cab","application/x-cab")
+ ,("cac","chemical/x-cache")
+ ,("cache","chemical/x-cache")
+ ,("cap","application/cap")
+ ,("cascii","chemical/x-cactvs-binary")
+ ,("cat","application/vnd.ms-pki.seccat")
+ ,("cbin","chemical/x-cactvs-binary")
+ ,("cbr","application/x-cbr")
+ ,("cbz","application/x-cbz")
+ ,("cc","text/x-c++src")
+ ,("cdf","application/x-cdf")
+ ,("cdr","image/x-coreldraw")
+ ,("cdt","image/x-coreldrawtemplate")
+ ,("cdx","chemical/x-cdx")
+ ,("cdy","application/vnd.cinderella")
+ ,("cef","chemical/x-cxf")
+ ,("cer","chemical/x-cerius")
+ ,("chm","chemical/x-chemdraw")
+ ,("chrt","application/x-kchart")
+ ,("cif","chemical/x-cif")
+ ,("class","application/java-vm")
+ ,("cls","text/x-tex")
+ ,("cmdf","chemical/x-cmdf")
+ ,("cml","chemical/x-cml")
+ ,("cod","application/vnd.rim.cod")
+ ,("com","application/x-msdos-program")
+ ,("cpa","chemical/x-compass")
+ ,("cpio","application/x-cpio")
+ ,("cpp","text/x-c++src")
+ ,("cpt","application/mac-compactpro")
+ ,("crl","application/x-pkcs7-crl")
+ ,("crt","application/x-x509-ca-cert")
+ ,("csf","chemical/x-cache-csf")
+ ,("csh","application/x-csh")
+ ,("csm","chemical/x-csml")
+ ,("csml","chemical/x-csml")
+ ,("css","text/css")
+ ,("csv","text/csv")
+ ,("ctab","chemical/x-cactvs-binary")
+ ,("ctx","chemical/x-ctx")
+ ,("cu","application/cu-seeme")
+ ,("cub","chemical/x-gaussian-cube")
+ ,("cxf","chemical/x-cxf")
+ ,("cxx","text/x-c++src")
+ ,("d","text/x-dsrc")
+ ,("dat","chemical/x-mopac-input")
+ ,("dcr","application/x-director")
+ ,("deb","application/x-debian-package")
+ ,("dif","video/dv")
+ ,("diff","text/x-diff")
+ ,("dir","application/x-director")
+ ,("djv","image/vnd.djvu")
+ ,("djvu","image/vnd.djvu")
+ ,("dl","video/dl")
+ ,("dll","application/x-msdos-program")
+ ,("dmg","application/x-apple-diskimage")
+ ,("dms","application/x-dms")
+ ,("doc","application/msword")
+ ,("dot","application/msword")
+ ,("dv","video/dv")
+ ,("dvi","application/x-dvi")
+ ,("dx","chemical/x-jcamp-dx")
+ ,("dxr","application/x-director")
+ ,("emb","chemical/x-embl-dl-nucleotide")
+ ,("embl","chemical/x-embl-dl-nucleotide")
+ ,("emf","image/x-emf")
+ ,("eml","message/rfc822")
+ ,("ent","chemical/x-ncbi-asn1-ascii")
+ ,("eot","application/vnd.ms-fontobject")
+ ,("eps","application/postscript")
+ ,("etx","text/x-setext")
+ ,("exe","application/x-msdos-program")
+ ,("ez","application/andrew-inset")
+ ,("fb","application/x-maker")
+ ,("fbdoc","application/x-maker")
+ ,("fch","chemical/x-gaussian-checkpoint")
+ ,("fchk","chemical/x-gaussian-checkpoint")
+ ,("fig","application/x-xfig")
+ ,("flac","application/x-flac")
+ ,("fli","video/fli")
+ ,("fm","application/x-maker")
+ ,("frame","application/x-maker")
+ ,("frm","application/x-maker")
+ ,("fs","text/plain")
+ ,("gal","chemical/x-gaussian-log")
+ ,("gam","chemical/x-gamess-input")
+ ,("gamin","chemical/x-gamess-input")
+ ,("gau","chemical/x-gaussian-input")
+ ,("gcd","text/x-pcs-gcd")
+ ,("gcf","application/x-graphing-calculator")
+ ,("gcg","chemical/x-gcg8-sequence")
+ ,("gen","chemical/x-genbank")
+ ,("gf","application/x-tex-gf")
+ ,("gif","image/gif")
+ ,("gjc","chemical/x-gaussian-input")
+ ,("gjf","chemical/x-gaussian-input")
+ ,("gl","video/gl")
+ ,("gnumeric","application/x-gnumeric")
+ ,("gpt","chemical/x-mopac-graph")
+ ,("gsf","application/x-font")
+ ,("gsm","audio/x-gsm")
+ ,("gtar","application/x-gtar")
+ ,("h","text/x-chdr")
+ ,("h++","text/x-c++hdr")
+ ,("hdf","application/x-hdf")
+ ,("hh","text/x-c++hdr")
+ ,("hin","chemical/x-hin")
+ ,("hpp","text/x-c++hdr")
+ ,("hqx","application/mac-binhex40")
+ ,("hs","text/x-haskell")
+ ,("hta","application/hta")
+ ,("htc","text/x-component")
+ ,("htm","text/html")
+ ,("html","text/html")
+ ,("hxx","text/x-c++hdr")
+ ,("ica","application/x-ica")
+ ,("ice","x-conference/x-cooltalk")
+ ,("ico","image/x-icon")
+ ,("ics","text/calendar")
+ ,("icz","text/calendar")
+ ,("ief","image/ief")
+ ,("iges","model/iges")
+ ,("igs","model/iges")
+ ,("iii","application/x-iphone")
+ ,("inp","chemical/x-gamess-input")
+ ,("ins","application/x-internet-signup")
+ ,("iso","application/x-iso9660-image")
+ ,("isp","application/x-internet-signup")
+ ,("ist","chemical/x-isostar")
+ ,("istr","chemical/x-isostar")
+ ,("jad","text/vnd.sun.j2me.app-descriptor")
+ ,("jar","application/java-archive")
+ ,("java","text/x-java")
+ ,("jdx","chemical/x-jcamp-dx")
+ ,("jmz","application/x-jmol")
+ ,("jng","image/x-jng")
+ ,("jnlp","application/x-java-jnlp-file")
+ ,("jpe","image/jpeg")
+ ,("jpeg","image/jpeg")
+ ,("jfif","image/jpeg")
+ ,("jpg","image/jpeg")
+ ,("js","application/x-javascript")
+ ,("kar","audio/midi")
+ ,("key","application/pgp-keys")
+ ,("kil","application/x-killustrator")
+ ,("kin","chemical/x-kinemage")
+ ,("kml","application/vnd.google-earth.kml+xml")
+ ,("kmz","application/vnd.google-earth.kmz")
+ ,("kpr","application/x-kpresenter")
+ ,("kpt","application/x-kpresenter")
+ ,("ksp","application/x-kspread")
+ ,("kwd","application/x-kword")
+ ,("kwt","application/x-kword")
+ ,("latex","application/x-latex")
+ ,("lha","application/x-lha")
+ ,("lhs","text/x-literate-haskell")
+ ,("lsf","video/x-la-asf")
+ ,("lsx","video/x-la-asf")
+ ,("ltx","text/x-tex")
+ ,("lyx","application/x-lyx")
+ ,("lzh","application/x-lzh")
+ ,("lzx","application/x-lzx")
+ ,("m3u","audio/mpegurl")
+ ,("m4a","audio/mpeg")
+ ,("m4v","video/x-m4v")
+ ,("maker","application/x-maker")
+ ,("man","application/x-troff-man")
+ ,("mcif","chemical/x-mmcif")
+ ,("mcm","chemical/x-macmolecule")
+ ,("mdb","application/msaccess")
+ ,("me","application/x-troff-me")
+ ,("mesh","model/mesh")
+ ,("mid","audio/midi")
+ ,("midi","audio/midi")
+ ,("mif","application/x-mif")
+ ,("mm","application/x-freemind")
+ ,("mmd","chemical/x-macromodel-input")
+ ,("mmf","application/vnd.smaf")
+ ,("mml","text/mathml")
+ ,("mmod","chemical/x-macromodel-input")
+ ,("mng","video/x-mng")
+ ,("moc","text/x-moc")
+ ,("mol","chemical/x-mdl-molfile")
+ ,("mol2","chemical/x-mol2")
+ ,("moo","chemical/x-mopac-out")
+ ,("mop","chemical/x-mopac-input")
+ ,("mopcrt","chemical/x-mopac-input")
+ ,("mov","video/quicktime")
+ ,("movie","video/x-sgi-movie")
+ ,("mp2","audio/mpeg")
+ ,("mp3","audio/mpeg")
+ ,("mp4","video/mp4")
+ ,("mpc","chemical/x-mopac-input")
+ ,("mpe","video/mpeg")
+ ,("mpeg","video/mpeg")
+ ,("mpega","audio/mpeg")
+ ,("mpg","video/mpeg")
+ ,("mpga","audio/mpeg")
+ ,("ms","application/x-troff-ms")
+ ,("msh","model/mesh")
+ ,("msi","application/x-msi")
+ ,("mvb","chemical/x-mopac-vib")
+ ,("mxu","video/vnd.mpegurl")
+ ,("nb","application/mathematica")
+ ,("nc","application/x-netcdf")
+ ,("nwc","application/x-nwc")
+ ,("o","application/x-object")
+ ,("oda","application/oda")
+ ,("odb","application/vnd.oasis.opendocument.database")
+ ,("odc","application/vnd.oasis.opendocument.chart")
+ ,("odf","application/vnd.oasis.opendocument.formula")
+ ,("odg","application/vnd.oasis.opendocument.graphics")
+ ,("odi","application/vnd.oasis.opendocument.image")
+ ,("odm","application/vnd.oasis.opendocument.text-master")
+ ,("odp","application/vnd.oasis.opendocument.presentation")
+ ,("ods","application/vnd.oasis.opendocument.spreadsheet")
+ ,("odt","application/vnd.oasis.opendocument.text")
+ ,("oga","audio/ogg")
+ ,("ogg","application/ogg")
+ ,("ogv","video/ogg")
+ ,("ogx","application/ogg")
+ ,("old","application/x-trash")
+ ,("otg","application/vnd.oasis.opendocument.graphics-template")
+ ,("oth","application/vnd.oasis.opendocument.text-web")
+ ,("otp","application/vnd.oasis.opendocument.presentation-template")
+ ,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
+ ,("otf","application/vnd.ms-opentype")
+ ,("ott","application/vnd.oasis.opendocument.text-template")
+ ,("oza","application/x-oz-application")
+ ,("p","text/x-pascal")
+ ,("p7r","application/x-pkcs7-certreqresp")
+ ,("pac","application/x-ns-proxy-autoconfig")
+ ,("pas","text/x-pascal")
+ ,("pat","image/x-coreldrawpattern")
+ ,("patch","text/x-diff")
+ ,("pbm","image/x-portable-bitmap")
+ ,("pcap","application/cap")
+ ,("pcf","application/x-font")
+ ,("pcf.Z","application/x-font")
+ ,("pcx","image/pcx")
+ ,("pdb","chemical/x-pdb")
+ ,("pdf","application/pdf")
+ ,("pfa","application/x-font")
+ ,("pfb","application/x-font")
+ ,("pgm","image/x-portable-graymap")
+ ,("pgn","application/x-chess-pgn")
+ ,("pgp","application/pgp-signature")
+ ,("php","application/x-httpd-php")
+ ,("php3","application/x-httpd-php3")
+ ,("php3p","application/x-httpd-php3-preprocessed")
+ ,("php4","application/x-httpd-php4")
+ ,("phps","application/x-httpd-php-source")
+ ,("pht","application/x-httpd-php")
+ ,("phtml","application/x-httpd-php")
+ ,("pk","application/x-tex-pk")
+ ,("pl","text/x-perl")
+ ,("pls","audio/x-scpls")
+ ,("pm","text/x-perl")
+ ,("png","image/png")
+ ,("pnm","image/x-portable-anymap")
+ ,("pot","text/plain")
+ ,("ppm","image/x-portable-pixmap")
+ ,("pps","application/vnd.ms-powerpoint")
+ ,("ppt","application/vnd.ms-powerpoint")
+ ,("prf","application/pics-rules")
+ ,("prt","chemical/x-ncbi-asn1-ascii")
+ ,("ps","application/postscript")
+ ,("psd","image/x-photoshop")
+ ,("py","text/x-python")
+ ,("pyc","application/x-python-code")
+ ,("pyo","application/x-python-code")
+ ,("qt","video/quicktime")
+ ,("qtl","application/x-quicktimeplayer")
+ ,("ra","audio/x-pn-realaudio")
+ ,("ram","audio/x-pn-realaudio")
+ ,("rar","application/rar")
+ ,("ras","image/x-cmu-raster")
+ ,("rd","chemical/x-mdl-rdfile")
+ ,("rdf","application/rdf+xml")
+ ,("rgb","image/x-rgb")
+ ,("rhtml","application/x-httpd-eruby")
+ ,("rm","audio/x-pn-realaudio")
+ ,("roff","application/x-troff")
+ ,("ros","chemical/x-rosdal")
+ ,("rpm","application/x-redhat-package-manager")
+ ,("rss","application/rss+xml")
+ ,("rtf","application/rtf")
+ ,("rtx","text/richtext")
+ ,("rxn","chemical/x-mdl-rxnfile")
+ ,("sct","text/scriptlet")
+ ,("sd","chemical/x-mdl-sdfile")
+ ,("sd2","audio/x-sd2")
+ ,("sda","application/vnd.stardivision.draw")
+ ,("sdc","application/vnd.stardivision.calc")
+ ,("sdd","application/vnd.stardivision.impress")
+ ,("sdf","application/vnd.stardivision.math")
+ ,("sds","application/vnd.stardivision.chart")
+ ,("sdw","application/vnd.stardivision.writer")
+ ,("ser","application/java-serialized-object")
+ ,("sgf","application/x-go-sgf")
+ ,("sgl","application/vnd.stardivision.writer-global")
+ ,("sh","application/x-sh")
+ ,("shar","application/x-shar")
+ ,("shtml","text/html")
+ ,("sid","audio/prs.sid")
+ ,("sik","application/x-trash")
+ ,("silo","model/mesh")
+ ,("sis","application/vnd.symbian.install")
+ ,("sisx","x-epoc/x-sisx-app")
+ ,("sit","application/x-stuffit")
+ ,("sitx","application/x-stuffit")
+ ,("skd","application/x-koan")
+ ,("skm","application/x-koan")
+ ,("skp","application/x-koan")
+ ,("skt","application/x-koan")
+ ,("smi","application/smil")
+ ,("smil","application/smil")
+ ,("snd","audio/basic")
+ ,("spc","chemical/x-galactic-spc")
+ ,("spl","application/futuresplash")
+ ,("spx","audio/ogg")
+ ,("src","application/x-wais-source")
+ ,("stc","application/vnd.sun.xml.calc.template")
+ ,("std","application/vnd.sun.xml.draw.template")
+ ,("sti","application/vnd.sun.xml.impress.template")
+ ,("stl","application/vnd.ms-pki.stl")
+ ,("stw","application/vnd.sun.xml.writer.template")
+ ,("sty","text/x-tex")
+ ,("sv4cpio","application/x-sv4cpio")
+ ,("sv4crc","application/x-sv4crc")
+ ,("svg","image/svg+xml")
+ -- removed for now, since it causes problems with
+ -- extensionFromMimeType: see #2183.
+ -- ,("svgz","image/svg+xml")
+ ,("sw","chemical/x-swissprot")
+ ,("swf","application/x-shockwave-flash")
+ ,("swfl","application/x-shockwave-flash")
+ ,("sxc","application/vnd.sun.xml.calc")
+ ,("sxd","application/vnd.sun.xml.draw")
+ ,("sxg","application/vnd.sun.xml.writer.global")
+ ,("sxi","application/vnd.sun.xml.impress")
+ ,("sxm","application/vnd.sun.xml.math")
+ ,("sxw","application/vnd.sun.xml.writer")
+ ,("t","application/x-troff")
+ ,("tar","application/x-tar")
+ ,("taz","application/x-gtar")
+ ,("tcl","application/x-tcl")
+ ,("tex","text/x-tex")
+ ,("texi","application/x-texinfo")
+ ,("texinfo","application/x-texinfo")
+ ,("text","text/plain")
+ ,("tgf","chemical/x-mdl-tgf")
+ ,("tgz","application/x-gtar")
+ ,("tif","image/tiff")
+ ,("tiff","image/tiff")
+ ,("tk","text/x-tcl")
+ ,("tm","text/texmacs")
+ ,("torrent","application/x-bittorrent")
+ ,("tr","application/x-troff")
+ ,("ts","text/texmacs")
+ ,("tsp","application/dsptype")
+ ,("tsv","text/tab-separated-values")
+ ,("ttf","application/x-font-truetype")
+ ,("txt","text/plain")
+ ,("udeb","application/x-debian-package")
+ ,("uls","text/iuls")
+ ,("ustar","application/x-ustar")
+ ,("val","chemical/x-ncbi-asn1-binary")
+ ,("vcd","application/x-cdlink")
+ ,("vcf","text/x-vcard")
+ ,("vcs","text/x-vcalendar")
+ ,("vmd","chemical/x-vmd")
+ ,("vms","chemical/x-vamas-iso14976")
+ ,("vrm","x-world/x-vrml")
+ ,("vrml","model/vrml")
+ ,("vs","text/plain")
+ ,("vsd","application/vnd.visio")
+ ,("vtt","text/vtt")
+ ,("wad","application/x-doom")
+ ,("wav","audio/x-wav")
+ ,("wax","audio/x-ms-wax")
+ ,("wbmp","image/vnd.wap.wbmp")
+ ,("wbxml","application/vnd.wap.wbxml")
+ ,("webm","video/webm")
+ ,("wk","application/x-123")
+ ,("wm","video/x-ms-wm")
+ ,("wma","audio/x-ms-wma")
+ ,("wmd","application/x-ms-wmd")
+ ,("wmf","image/x-wmf")
+ ,("wml","text/vnd.wap.wml")
+ ,("wmlc","application/vnd.wap.wmlc")
+ ,("wmls","text/vnd.wap.wmlscript")
+ ,("wmlsc","application/vnd.wap.wmlscriptc")
+ ,("wmv","video/x-ms-wmv")
+ ,("wmx","video/x-ms-wmx")
+ ,("wmz","application/x-ms-wmz")
+ ,("woff","application/font-woff")
+ ,("woff2","font/woff2")
+ ,("wp5","application/wordperfect5.1")
+ ,("wpd","application/wordperfect")
+ ,("wrl","model/vrml")
+ ,("wsc","text/scriptlet")
+ ,("wvx","video/x-ms-wvx")
+ ,("wz","application/x-wingz")
+ ,("xbm","image/x-xbitmap")
+ ,("xcf","application/x-xcf")
+ ,("xht","application/xhtml+xml")
+ ,("xhtml","application/xhtml+xml")
+ ,("xlb","application/vnd.ms-excel")
+ ,("xls","application/vnd.ms-excel")
+ ,("xlt","application/vnd.ms-excel")
+ ,("xml","application/xml")
+ ,("xpi","application/x-xpinstall")
+ ,("xpm","image/x-xpixmap")
+ ,("xsl","application/xml")
+ ,("xtel","chemical/x-xtel")
+ ,("xul","application/vnd.mozilla.xul+xml")
+ ,("xwd","image/x-xwindowdump")
+ ,("xyz","chemical/x-xyz")
+ ,("zip","application/zip")
+ ,("zmt","chemical/x-mopac-input")
+ ]
+
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..fe99be5fe
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,113 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MediaBag
+ Copyright : Copyright (C) 2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+ MediaBag,
+ lookupMedia,
+ insertMedia,
+ mediaDirectory,
+ extractMediaBag
+ ) where
+import System.FilePath
+import qualified System.FilePath.Posix as Posix
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Control.Monad (when)
+import Control.Monad.Trans (MonadIO(..))
+import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString))
+ deriving (Monoid, Data, Typeable)
+
+instance Show MediaBag where
+ show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe MimeType -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
+ -> MediaBag
+ -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+ MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap)
+ where mime = fromMaybe fallback mbMime
+ fallback = case takeExtension fp of
+ ".gz" -> getMimeTypeDef $ dropExtension fp
+ _ -> getMimeTypeDef fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+ -> MediaBag
+ -> Maybe (MimeType, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, MimeType, Int)]
+mediaDirectory (MediaBag mediamap) =
+ M.foldWithKey (\fp (mime,contents) ->
+ (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory. Print informational
+-- messages if 'verbose' is true.
+-- TODO: eventually we may want to put this into PandocMonad
+-- In PandocPure, it could write to the fake file system...
+extractMediaBag :: MonadIO m
+ => Bool
+ -> FilePath
+ -> MediaBag
+ -> m ()
+extractMediaBag verbose dir (MediaBag mediamap) = liftIO $ do
+ sequence_ $ M.foldWithKey
+ (\fp (_ ,contents) ->
+ ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> normalise subpath
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+ BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
new file mode 100644
index 000000000..bc62f87d0
--- /dev/null
+++ b/src/Text/Pandoc/Options.hs
@@ -0,0 +1,217 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
+{-
+Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Options
+ Copyright : Copyright (C) 2012-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Data structures and functions for representing parser and writer
+options.
+-}
+module Text.Pandoc.Options ( module Text.Pandoc.Extensions
+ , ReaderOptions(..)
+ , HTMLMathMethod (..)
+ , CiteMethod (..)
+ , ObfuscationMethod (..)
+ , HTMLSlideVariant (..)
+ , EPUBVersion (..)
+ , WrapOption (..)
+ , TopLevelDivision (..)
+ , WriterOptions (..)
+ , TrackChanges (..)
+ , ReferenceLocation (..)
+ , def
+ , isEnabled
+ ) where
+import Text.Pandoc.Extensions
+import Data.Default
+import Text.Pandoc.Highlighting (Style, pygments)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import GHC.Generics (Generic)
+
+data ReaderOptions = ReaderOptions{
+ readerExtensions :: Extensions -- ^ Syntax extensions
+ , readerStandalone :: Bool -- ^ Standalone document with header
+ , readerColumns :: Int -- ^ Number of columns in terminal
+ , readerTabStop :: Int -- ^ Tab stop
+ , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
+ , readerIndentedCodeClasses :: [String] -- ^ Default classes for
+ -- indented code blocks
+ , readerDefaultImageExtension :: String -- ^ Default extension for images
+ , readerTrackChanges :: TrackChanges
+} deriving (Show, Read, Data, Typeable, Generic)
+
+instance Default ReaderOptions
+ where def = ReaderOptions{
+ readerExtensions = emptyExtensions
+ , readerStandalone = False
+ , readerColumns = 80
+ , readerTabStop = 4
+ , readerApplyMacros = True
+ , readerIndentedCodeClasses = []
+ , readerDefaultImageExtension = ""
+ , readerTrackChanges = AcceptChanges
+ }
+
+--
+-- Writer options
+--
+
+data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic)
+
+data HTMLMathMethod = PlainMath
+ | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
+ | JsMath (Maybe String) -- url of jsMath load script
+ | GladTeX
+ | WebTeX String -- url of TeX->image script.
+ | MathML
+ | MathJax String -- url of MathJax.js
+ | KaTeX String String -- url of stylesheet and katex.js
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+data CiteMethod = Citeproc -- use citeproc to render them
+ | Natbib -- output natbib cite commands
+ | Biblatex -- output biblatex cite commands
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Methods for obfuscating email addresses in HTML.
+data ObfuscationMethod = NoObfuscation
+ | ReferenceObfuscation
+ | JavascriptObfuscation
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Varieties of HTML slide shows.
+data HTMLSlideVariant = S5Slides
+ | SlidySlides
+ | SlideousSlides
+ | DZSlides
+ | RevealJsSlides
+ | NoSlides
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Options for accepting or rejecting MS Word track-changes.
+data TrackChanges = AcceptChanges
+ | RejectChanges
+ | AllChanges
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Options for wrapping text in the output.
+data WrapOption = WrapAuto -- ^ Automatically wrap to width
+ | WrapNone -- ^ No non-semantic newlines
+ | WrapPreserve -- ^ Preserve wrapping of input source
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Options defining the type of top-level headers.
+data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts
+ | TopLevelChapter -- ^ Top-level headers become chapters
+ | TopLevelSection -- ^ Top-level headers become sections
+ | TopLevelDefault -- ^ Top-level type is determined via
+ -- heuristics
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Locations for footnotes and references in markdown output
+data ReferenceLocation = EndOfBlock -- ^ End of block
+ | EndOfSection -- ^ prior to next section header (or end of document)
+ | EndOfDocument -- ^ at end of document
+ deriving (Show, Read, Eq, Data, Typeable, Generic)
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerTemplate :: Maybe String -- ^ Template to use
+ , writerVariables :: [(String, String)] -- ^ Variables to set in template
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerIncremental :: Bool -- ^ True if lists should be incremental
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ...
+ , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , writerExtensions :: Extensions -- ^ Markdown extensions that can be used
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions
+ , writerWrapText :: WrapOption -- ^ Option for wrapping text
+ , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
+ , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
+ -- and for footnote marks in markdown
+ , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file
+ , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
+ , writerCiteMethod :: CiteMethod -- ^ How to print cites
+ , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML
+ , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
+ , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions
+ , writerListings :: Bool -- ^ Use listings package for code
+ , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting
+ -- (Nothing = no highlighting)
+ , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
+ , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB
+ , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line
+ , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
+ , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
+ , writerTOCDepth :: Int -- ^ Number of levels to include in TOC
+ , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
+ , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
+ , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
+ } deriving (Show, Data, Typeable, Generic)
+
+instance Default WriterOptions where
+ def = WriterOptions { writerTemplate = Nothing
+ , writerVariables = []
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerIncremental = False
+ , writerHTMLMathMethod = PlainMath
+ , writerNumberSections = False
+ , writerNumberOffset = [0,0,0,0,0,0]
+ , writerSectionDivs = False
+ , writerExtensions = emptyExtensions
+ , writerReferenceLinks = False
+ , writerDpi = 96
+ , writerWrapText = WrapAuto
+ , writerColumns = 72
+ , writerEmailObfuscation = NoObfuscation
+ , writerIdentifierPrefix = ""
+ , writerSourceURL = Nothing
+ , writerUserDataDir = Nothing
+ , writerCiteMethod = Citeproc
+ , writerHtmlQTags = False
+ , writerSlideLevel = Nothing
+ , writerTopLevelDivision = TopLevelDefault
+ , writerListings = False
+ , writerHighlightStyle = Just pygments
+ , writerSetextHeaders = True
+ , writerEpubMetadata = Nothing
+ , writerEpubStylesheet = Nothing
+ , writerEpubFonts = []
+ , writerEpubChapterLevel = 1
+ , writerTOCDepth = 3
+ , writerReferenceDoc = Nothing
+ , writerLaTeXArgs = []
+ , writerReferenceLocation = EndOfDocument
+ }
+
+-- | Returns True if the given extension is enabled.
+isEnabled :: Extension -> WriterOptions -> Bool
+isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts)
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
new file mode 100644
index 000000000..1b3b4eb88
--- /dev/null
+++ b/src/Text/Pandoc/PDF.hs
@@ -0,0 +1,369 @@
+{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-}
+{-
+Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.PDF
+ Copyright : Copyright (C) 2012-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX documents to PDF.
+-}
+module Text.Pandoc.PDF ( makePDF ) where
+
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy.Char8 as BC
+import qualified Data.ByteString as BS
+import Data.Monoid ((<>))
+import System.Exit (ExitCode (..))
+import System.FilePath
+import System.IO (stdout)
+import System.IO.Temp (withTempFile)
+import System.Directory
+import Data.Digest.Pure.SHA (showDigest, sha1)
+import System.Environment
+import Control.Monad (unless, when, (<=<))
+import qualified Control.Exception as E
+import Data.List (isInfixOf)
+import Data.Maybe (fromMaybe)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Definition
+import Text.Pandoc.MediaBag
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Shared (warn, withTempDir, inDirectory, stringify)
+import Text.Pandoc.Writers.Shared (getField, metaToJSON)
+import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
+import Text.Pandoc.Logging (Verbosity(..))
+import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
+import Text.Pandoc.Process (pipeProcess)
+import Control.Monad.Trans (MonadIO(..))
+import qualified Data.ByteString.Lazy as BL
+import qualified Codec.Picture as JP
+#ifdef _WINDOWS
+import Data.List (intercalate)
+#endif
+import Text.Pandoc.Class (PandocIO, runIOorExplode, fetchItem, setMediaBag, runIO)
+
+#ifdef _WINDOWS
+changePathSeparators :: FilePath -> FilePath
+changePathSeparators = intercalate "/" . splitDirectories
+#endif
+
+makePDF :: MonadIO m
+ => String -- ^ pdf creator (pdflatex, lualatex,
+ -- xelatex, context, wkhtmltopdf)
+ -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer
+ -> WriterOptions -- ^ options
+ -> Verbosity -- ^ verbosity level
+ -> MediaBag -- ^ media
+ -> Pandoc -- ^ document
+ -> m (Either ByteString ByteString)
+makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do
+ let mathArgs = case writerHTMLMathMethod opts of
+ -- with MathJax, wait til all math is rendered:
+ MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
+ "--window-status", "mathjax_loaded"]
+ _ -> []
+ meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
+ let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
+ let args = mathArgs ++
+ concatMap toArgs
+ [("page-size", getField "papersize" meta')
+ ,("title", getField "title" meta')
+ ,("margin-bottom", fromMaybe (Just "1.2in")
+ (getField "margin-bottom" meta'))
+ ,("margin-top", fromMaybe (Just "1.25in")
+ (getField "margin-top" meta'))
+ ,("margin-right", fromMaybe (Just "1.25in")
+ (getField "margin-right" meta'))
+ ,("margin-left", fromMaybe (Just "1.25in")
+ (getField "margin-left" meta'))
+ ]
+ source <- runIOorExplode $ writer opts doc
+ html2pdf verbosity args source
+makePDF program writer opts verbosity mediabag doc =
+ liftIO $ withTempDir "tex2pdf." $ \tmpdir -> do
+ doc' <- handleImages opts mediabag tmpdir doc
+ source <- runIOorExplode $ writer opts doc'
+ let args = writerLaTeXArgs opts
+ case takeBaseName program of
+ "context" -> context2pdf verbosity tmpdir source
+ prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
+ -> tex2pdf' verbosity args tmpdir program source
+ _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
+
+handleImages :: WriterOptions
+ -> MediaBag
+ -> FilePath -- ^ temp dir to store images
+ -> Pandoc -- ^ document
+ -> IO Pandoc
+handleImages opts mediabag tmpdir =
+ walkM (convertImages tmpdir) <=< walkM (handleImage' opts mediabag tmpdir)
+
+handleImage' :: WriterOptions
+ -> MediaBag
+ -> FilePath
+ -> Inline
+ -> IO Inline
+handleImage' opts mediabag tmpdir (Image attr ils (src,tit)) = do
+ exists <- doesFileExist src
+ if exists
+ then return $ Image attr ils (src,tit)
+ else do
+ res <- runIO $ do
+ setMediaBag mediabag
+ fetchItem (writerSourceURL opts) src
+ case res of
+ Right (contents, Just mime) -> do
+ let ext = fromMaybe (takeExtension src) $
+ extensionFromMimeType mime
+ let basename = showDigest $ sha1 $ BL.fromChunks [contents]
+ let fname = tmpdir </> basename <.> ext
+ BS.writeFile fname contents
+ return $ Image attr ils (fname,tit)
+ _ -> do
+ warn $ "Could not find image `" ++ src ++ "', skipping..."
+ -- return alt text
+ return $ Emph ils
+handleImage' _ _ _ x = return x
+
+convertImages :: FilePath -> Inline -> IO Inline
+convertImages tmpdir (Image attr ils (src, tit)) = do
+ img <- convertImage tmpdir src
+ newPath <-
+ case img of
+ Left e -> src <$ warn e
+ Right fp -> return fp
+ return (Image attr ils (newPath, tit))
+convertImages _ x = return x
+
+-- Convert formats which do not work well in pdf to png
+convertImage :: FilePath -> FilePath -> IO (Either String FilePath)
+convertImage tmpdir fname =
+ case mime of
+ Just "image/png" -> doNothing
+ Just "image/jpeg" -> doNothing
+ Just "application/pdf" -> doNothing
+ _ -> JP.readImage fname >>= \res ->
+ case res of
+ Left _ -> return $ Left $ "Unable to convert `" ++
+ fname ++ "' for use with pdflatex."
+ Right img ->
+ E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
+ \(e :: E.SomeException) -> return (Left (show e))
+ where
+ fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir
+ mime = getMimeType fname
+ doNothing = return (Right fname)
+
+tex2pdf' :: Verbosity -- ^ Verbosity level
+ -> [String] -- ^ Arguments to the latex-engine
+ -> FilePath -- ^ temp directory for output
+ -> String -- ^ tex program
+ -> String -- ^ tex source
+ -> IO (Either ByteString ByteString)
+tex2pdf' verbosity args tmpDir program source = do
+ let numruns = if "\\tableofcontents" `isInfixOf` source
+ then 3 -- to get page numbers
+ else 2 -- 1 run won't give you PDF bookmarks
+ (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source
+ case (exit, mbPdf) of
+ (ExitFailure _, _) -> do
+ let logmsg = extractMsg log'
+ let extramsg =
+ case logmsg of
+ x | "! Package inputenc Error" `BC.isPrefixOf` x
+ && program /= "xelatex"
+ -> "\nTry running pandoc with --latex-engine=xelatex."
+ _ -> ""
+ return $ Left $ logmsg <> extramsg
+ (ExitSuccess, Nothing) -> return $ Left ""
+ (ExitSuccess, Just pdf) -> return $ Right pdf
+
+-- parsing output
+
+extractMsg :: ByteString -> ByteString
+extractMsg log' = do
+ let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
+ let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
+ let lineno = take 1 rest
+ if null msg'
+ then log'
+ else BC.unlines (msg'' ++ lineno)
+
+extractConTeXtMsg :: ByteString -> ByteString
+extractConTeXtMsg log' = do
+ let msg' = take 1 $
+ dropWhile (not . ("tex error" `BC.isPrefixOf`)) $ BC.lines log'
+ if null msg'
+ then log'
+ else BC.unlines msg'
+
+-- running tex programs
+
+-- Run a TeX program on an input bytestring and return (exit code,
+-- contents of stdout, contents of produced PDF if any). Rerun
+-- a fixed number of times to resolve references.
+runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath
+ -> String -> IO (ExitCode, ByteString, Maybe ByteString)
+runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
+ let file = tmpDir </> "input.tex"
+ exists <- doesFileExist file
+ unless exists $ UTF8.writeFile file source
+#ifdef _WINDOWS
+ -- note: we want / even on Windows, for TexLive
+ let tmpDir' = changePathSeparators tmpDir
+ let file' = changePathSeparators file
+#else
+ let tmpDir' = tmpDir
+ let file' = file
+#endif
+ let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
+ "-output-directory", tmpDir'] ++ args ++ [file']
+ env' <- getEnvironment
+ let sep = [searchPathSeparator]
+ let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
+ $ lookup "TEXINPUTS" env'
+ let env'' = ("TEXINPUTS", texinputs) :
+ [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
+ when (verbosity >= INFO && runNumber == 1) $ do
+ putStrLn "[makePDF] temp dir:"
+ putStrLn tmpDir'
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env''
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
+ B.readFile file' >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty
+ when (verbosity >= INFO) $ do
+ putStrLn $ "[makePDF] Run #" ++ show runNumber
+ B.hPutStr stdout out
+ putStr "\n"
+ if runNumber <= numRuns
+ then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
+ else do
+ let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
+ pdfExists <- doesFileExist pdfFile
+ pdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ else return Nothing
+ return (exit, out, pdf)
+
+html2pdf :: Verbosity -- ^ Verbosity level
+ -> [String] -- ^ Args to wkhtmltopdf
+ -> String -- ^ HTML5 source
+ -> IO (Either ByteString ByteString)
+html2pdf verbosity args source = do
+ file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
+ pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
+ UTF8.writeFile file source
+ let programArgs = args ++ [file, pdfFile]
+ env' <- getEnvironment
+ when (verbosity >= INFO) $ do
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env'
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
+ B.readFile file >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty
+ removeFile file
+ when (verbosity >= INFO) $ do
+ B.hPutStr stdout out
+ putStr "\n"
+ pdfExists <- doesFileExist pdfFile
+ mbPdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then do
+ res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ removeFile pdfFile
+ return res
+ else return Nothing
+ return $ case (exit, mbPdf) of
+ (ExitFailure _, _) -> Left out
+ (ExitSuccess, Nothing) -> Left ""
+ (ExitSuccess, Just pdf) -> Right pdf
+
+context2pdf :: Verbosity -- ^ Verbosity level
+ -> FilePath -- ^ temp directory for output
+ -> String -- ^ ConTeXt source
+ -> IO (Either ByteString ByteString)
+context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
+ let file = "input.tex"
+ UTF8.writeFile file source
+#ifdef _WINDOWS
+ -- note: we want / even on Windows, for TexLive
+ let tmpDir' = changePathSeparators tmpDir
+#else
+ let tmpDir' = tmpDir
+#endif
+ let programArgs = "--batchmode" : [file]
+ env' <- getEnvironment
+ let sep = [searchPathSeparator]
+ let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++)
+ $ lookup "TEXINPUTS" env'
+ let env'' = ("TEXINPUTS", texinputs) :
+ [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
+ when (verbosity >= INFO) $ do
+ putStrLn "[makePDF] temp dir:"
+ putStrLn tmpDir'
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "context" ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env''
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
+ B.readFile file >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env'') "context" programArgs BL.empty
+ when (verbosity >= INFO) $ do
+ B.hPutStr stdout out
+ putStr "\n"
+ let pdfFile = replaceExtension file ".pdf"
+ pdfExists <- doesFileExist pdfFile
+ mbPdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ else return Nothing
+ case (exit, mbPdf) of
+ (ExitFailure _, _) -> do
+ let logmsg = extractConTeXtMsg out
+ return $ Left logmsg
+ (ExitSuccess, Nothing) -> return $ Left ""
+ (ExitSuccess, Just pdf) -> return $ Right pdf
+
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
new file mode 100644
index 000000000..400d07f2a
--- /dev/null
+++ b/src/Text/Pandoc/Parsing.hs
@@ -0,0 +1,1329 @@
+{-# LANGUAGE
+ FlexibleContexts
+, GeneralizedNewtypeDeriving
+, TypeSynonymInstances
+, MultiParamTypeClasses
+, FlexibleInstances
+, IncoherentInstances #-}
+
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Parsing
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A utility library with parsers used in pandoc readers.
+-}
+module Text.Pandoc.Parsing ( anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ oneOfStringsCI,
+ spaceChar,
+ nonspaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ romanNumeral,
+ emailAddress,
+ uri,
+ mathInline,
+ mathDisplay,
+ withHorizDisplacement,
+ withRaw,
+ escaped,
+ characterReference,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ lineBlockLines,
+ tableWith,
+ widthsFromIndices,
+ gridTableWith,
+ readWith,
+ readWithM,
+ testStringWith,
+ guardEnabled,
+ guardDisabled,
+ updateLastStrPos,
+ notAfterString,
+ logMessage,
+ reportLogMessages,
+ ParserState (..),
+ HasReaderOptions (..),
+ HasHeaderMap (..),
+ HasIdentifierList (..),
+ HasMacros (..),
+ HasLogMessages (..),
+ HasLastStrPosition (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ HasQuoteContext (..),
+ NoteTable,
+ NoteTable',
+ KeyTable,
+ SubstTable,
+ Key (..),
+ toKey,
+ registerHeader,
+ smartPunctuation,
+ singleQuoteStart,
+ singleQuoteEnd,
+ doubleQuoteStart,
+ doubleQuoteEnd,
+ ellipses,
+ apostrophe,
+ dash,
+ nested,
+ citeKey,
+ macro,
+ applyMacros',
+ Parser,
+ ParserT,
+ F(..),
+ runF,
+ askF,
+ asksF,
+ token,
+ (<+?>),
+ extractIdClass,
+ insertIncludedFile,
+ -- * Re-exports from Text.Pandoc.Parsec
+ Stream,
+ runParser,
+ runParserT,
+ parse,
+ anyToken,
+ getInput,
+ setInput,
+ unexpected,
+ char,
+ letter,
+ digit,
+ alphaNum,
+ skipMany,
+ skipMany1,
+ spaces,
+ space,
+ anyChar,
+ satisfy,
+ newline,
+ string,
+ count,
+ eof,
+ noneOf,
+ oneOf,
+ lookAhead,
+ notFollowedBy,
+ many,
+ many1,
+ manyTill,
+ (<|>),
+ (<?>),
+ choice,
+ try,
+ sepBy,
+ sepBy1,
+ sepEndBy,
+ sepEndBy1,
+ endBy,
+ endBy1,
+ option,
+ optional,
+ optionMaybe,
+ getState,
+ setState,
+ updateState,
+ SourcePos,
+ getPosition,
+ setPosition,
+ sourceColumn,
+ sourceLine,
+ setSourceColumn,
+ setSourceLine,
+ newPos
+ )
+where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.XML (fromEntities)
+import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
+import Text.Parsec hiding (token)
+import Text.Parsec.Pos (newPos)
+import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
+ isHexDigit, isSpace, isPunctuation )
+import Data.List ( intercalate, transpose, isSuffixOf )
+import Text.Pandoc.Shared
+import qualified Data.Map as M
+import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
+ parseMacroDefinitions)
+import Text.HTML.TagSoup.Entity ( lookupEntity )
+import Text.Pandoc.Asciify (toAsciiChar)
+import Data.Monoid ((<>))
+import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report)
+import Text.Pandoc.Logging
+import Data.Default
+import qualified Data.Set as Set
+import Control.Monad.Reader
+import Control.Monad.Identity
+import Data.Maybe (catMaybes)
+
+import Text.Pandoc.Error
+import Control.Monad.Except
+
+type Parser t s = Parsec t s
+
+type ParserT = ParsecT
+
+newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
+
+runF :: F a -> ParserState -> a
+runF = runReader . unF
+
+askF :: F ParserState
+askF = F ask
+
+asksF :: (ParserState -> a) -> F a
+asksF f = F $ asks f
+
+instance Monoid a => Monoid (F a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = liftM mconcat . sequence
+
+-- | Parse any line of text
+anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
+anyLine = do
+ -- This is much faster than:
+ -- manyTill anyChar newline
+ inp <- getInput
+ pos <- getPosition
+ case break (=='\n') inp of
+ (this, '\n':rest) -> do
+ -- needed to persuade parsec that this won't match an empty string:
+ anyChar
+ setInput rest
+ setPosition $ incSourceLine (setSourceColumn pos 1) 1
+ return this
+ _ -> mzero
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: Stream s m t
+ => ParserT s st m a
+ -> ParserT s st m end
+ -> ParserT s st m [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings' _ [] = fail "no strings"
+oneOfStrings' matches strs = try $ do
+ c <- anyChar
+ let strs' = [xs | (x:xs) <- strs, x `matches` c]
+ case strs' of
+ [] -> fail "not found"
+ _ -> (c:) <$> oneOfStrings' matches strs'
+ <|> if "" `elem` strs'
+ then return [c]
+ else fail "not found"
+
+-- | Parses one of a list of strings. If the list contains
+-- two strings one of which is a prefix of the other, the longer
+-- string will be matched if possible.
+oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
+oneOfStrings = oneOfStrings' (==)
+
+-- | Parses one of a list of strings (tried in order), case insensitive.
+oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
+oneOfStringsCI = oneOfStrings' ciMatch
+ where ciMatch x y = toLower' x == toLower' y
+ -- this optimizes toLower by checking common ASCII case
+ -- first, before calling the expensive unicode-aware
+ -- function:
+ toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32)
+ | isAscii c = c
+ | otherwise = toLower c
+
+-- | Parses a space or tab.
+spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
+
+-- | Parses a nonspace, nonnewline character.
+nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: Stream s m Char => ParserT s st m Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: Stream s m Char => ParserT s st m [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
+ -> ParserT s st m end -- ^ end parser
+ -> ParserT s st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT s st m [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ spaces
+ eof
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: Stream [Char] m Char => ParserT [Char] st m String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
+-- and return "hello (there)".
+charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+ -> ParserT s st m String
+charsInBalanced open close parser = try $ do
+ char open
+ let isDelim c = c == open || c == close
+ raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser)
+ <|> (do res <- charsInBalanced open close parser
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- old charsInBalanced would be:
+-- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
+-- old charsInBalanced' would be:
+-- charsInBalanced open close anyChar
+
+-- Auxiliary functions for romanNumeral:
+
+lowercaseRomanDigits :: [Char]
+lowercaseRomanDigits = ['i','v','x','l','c','d','m']
+
+uppercaseRomanDigits :: [Char]
+uppercaseRomanDigits = map toUpper lowercaseRomanDigits
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
+ -> ParserT s st m Int
+romanNumeral upperCase = do
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
+ else lowercaseRomanDigits
+ lookAhead $ oneOf romanDigits
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ map char romanDigits
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- Parsers for email addresses and URIs
+
+-- | Parses an email address; returns original and corresponding
+-- escaped mailto: URI.
+emailAddress :: Stream s m Char => ParserT s st m (String, String)
+emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
+ where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
+ in (full, escapeURI $ "mailto:" ++ full)
+ mailbox = intercalate "." <$> (emailWord `sepby1` dot)
+ domain = intercalate "." <$> (subdomain `sepby1` dot)
+ dot = char '.'
+ subdomain = many1 $ alphaNum <|> innerPunct
+ -- this excludes some valid email addresses, since an
+ -- email could contain e.g. '__', but gives better results
+ -- for our purposes, when combined with markdown parsing:
+ innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
+ <* notFollowedBy space
+ <* notFollowedBy (satisfy isPunctuation))
+ -- technically an email address could begin with a symbol,
+ -- but allowing this creates too many problems.
+ -- See e.g. https://github.com/jgm/pandoc/issues/2940
+ emailWord = do x <- satisfy isAlphaNum
+ xs <- many (satisfy isEmailChar)
+ return (x:xs)
+ isEmailChar c = isAlphaNum c || isEmailPunct c
+ isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
+ -- note: sepBy1 from parsec consumes input when sep
+ -- succeeds and p fails, so we use this variant here.
+ sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
+
+
+-- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
+-- the unofficial schemes coap, doi, javascript, isbn, pmid
+schemes :: [String]
+schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
+ "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
+ "h323","http","https","iax","icap","im","imap","info","ipp","iris",
+ "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
+ "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
+ "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
+ "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
+ "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
+ "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
+ "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
+ "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
+ "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
+ "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
+ "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
+ "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
+ "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
+ "platform","proxy","psyc","query","res","resource","rmi","rsync",
+ "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
+ "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
+ "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
+ "ymsgr", "isbn", "pmid"]
+
+uriScheme :: Stream s m Char => ParserT s st m String
+uriScheme = oneOfStringsCI schemes
+
+-- | Parses a URI. Returns pair of original and URI-escaped version.
+uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
+uri = try $ do
+ scheme <- uriScheme
+ char ':'
+ -- We allow sentence punctuation except at the end, since
+ -- we don't want the trailing '.' in 'http://google.com.' We want to allow
+ -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
+ -- as a URL, while NOT picking up the closing paren in
+ -- (http://wikipedia.org). So we include balanced parens in the URL.
+ let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
+ let wordChar = satisfy isWordChar
+ let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
+ let entity = () <$ characterReference
+ let punct = skipMany1 (char ',')
+ <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
+ let uriChunk = skipMany1 wordChar
+ <|> percentEscaped
+ <|> entity
+ <|> (try $ punct >>
+ lookAhead (void (satisfy isWordChar) <|> percentEscaped))
+ str <- snd <$> withRaw (skipMany1 ( () <$
+ (enclosed (char '(') (char ')') uriChunk
+ <|> enclosed (char '{') (char '}') uriChunk
+ <|> enclosed (char '[') (char ']') uriChunk)
+ <|> uriChunk))
+ str' <- option str $ char '/' >> return (str ++ "/")
+ let uri' = scheme ++ ":" ++ fromEntities str'
+ return (uri', escapeURI uri')
+
+mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
+mathInlineWith op cl = try $ do
+ string op
+ notFollowedBy space
+ words' <- many1Till (count 1 (noneOf " \t\n\\")
+ <|> (char '\\' >>
+ -- This next clause is needed because \text{..} can
+ -- contain $, \(\), etc.
+ (try (string "text" >>
+ (("\\text" ++) <$> inBalancedBraces 0 ""))
+ <|> (\c -> ['\\',c]) <$> anyChar))
+ <|> do (blankline <* notFollowedBy' blankline) <|>
+ (oneOf " \t" <* skipMany (oneOf " \t"))
+ notFollowedBy (char '$')
+ return " "
+ ) (try $ string cl)
+ notFollowedBy digit -- to prevent capture of $5
+ return $ concat words'
+ where
+ inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
+ inBalancedBraces 0 "" = do
+ c <- anyChar
+ if c == '{'
+ then inBalancedBraces 1 "{"
+ else mzero
+ inBalancedBraces 0 s = return $ reverse s
+ inBalancedBraces numOpen ('\\':xs) = do
+ c <- anyChar
+ inBalancedBraces numOpen (c:'\\':xs)
+ inBalancedBraces numOpen xs = do
+ c <- anyChar
+ case c of
+ '}' -> inBalancedBraces (numOpen - 1) (c:xs)
+ '{' -> inBalancedBraces (numOpen + 1) (c:xs)
+ _ -> inBalancedBraces numOpen (c:xs)
+
+mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
+mathDisplayWith op cl = try $ do
+ string op
+ many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
+
+mathDisplay :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m String
+mathDisplay =
+ (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathDisplayWith "\\[" "\\]")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathDisplayWith "\\\\[" "\\\\]")
+
+mathInline :: (HasReaderOptions st , Stream s m Char)
+ => ParserT s st m String
+mathInline =
+ (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathInlineWith "\\(" "\\)")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathInlineWith "\\\\(" "\\\\)")
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: Stream s m Char
+ => ParserT s st m a -- ^ Parser to apply
+ -> ParserT s st m (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Applies a parser and returns the raw string that was parsed,
+-- along with the value produced by the parser.
+withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
+withRaw parser = do
+ pos1 <- getPosition
+ inp <- getInput
+ result <- parser
+ pos2 <- getPosition
+ let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
+ let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
+ let inplines = take ((l2 - l1) + 1) $ lines inp
+ let raw = case inplines of
+ [] -> ""
+ [l] -> take (c2 - c1) l
+ ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
+ return (result, raw)
+
+-- | Parses backslash, then applies character parser.
+escaped :: Stream s m Char
+ => ParserT s st m Char -- ^ Parser for character to escape
+ -> ParserT s st m Char
+escaped parser = try $ char '\\' >> parser
+
+-- | Parse character entity.
+characterReference :: Stream s m Char => ParserT s st m Char
+characterReference = try $ do
+ char '&'
+ ent <- many1Till nonspaceChar (char ';')
+ let ent' = case ent of
+ '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug
+ '#':_ -> ent
+ _ -> ent ++ ";"
+ case lookupEntity ent' of
+ Just (c : _) -> return c
+ _ -> fail "entity not found"
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '@' and optional label and
+-- returns (DefaultStyle, [next example number]). The next
+-- example number is incremented in parser state, and the label
+-- (if present) is added to the label table.
+exampleNum :: Stream s m Char
+ => ParserT s ParserState m (ListNumberStyle, Int)
+exampleNum = do
+ char '@'
+ lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
+ st <- getState
+ let num = stateNextExample st
+ let newlabels = if null lab
+ then stateExamples st
+ else M.insert lab num $ stateExamples st
+ updateState $ \s -> s{ stateNextExample = num + 1
+ , stateExamples = newlabels }
+ return (Example, num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, exampleNum, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: Stream s m Char
+ => ListNumberStyle
+ -> ListNumberDelim
+ -> ParserT s ParserState m Int
+orderedListMarker style delim = do
+ let num = defaultNum <|> -- # can continue any kind of list
+ case style of
+ DefaultStyle -> decimal
+ Example -> exampleNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: Stream s m Char => ParserT s st m Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
+lineBlockLine = try $ do
+ char '|'
+ char ' '
+ white <- many (spaceChar >> return '\160')
+ notFollowedBy newline
+ line <- anyLine
+ continuations <- many (try $ char ' ' >> anyLine)
+ return $ white ++ unwords (line : continuations)
+
+blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char
+blankLineBlockLine = try (char '|' >> blankline)
+
+-- | Parses an RST-style line block and returns a list of strings.
+lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
+lineBlockLines = try $ do
+ lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine))
+ skipMany1 $ blankline <|> blankLineBlockLine
+ return lines'
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'.
+tableWith :: Stream s m Char
+ => ParserT s ParserState m ([Blocks], [Alignment], [Int])
+ -> ([Int] -> ParserT s ParserState m [Blocks])
+ -> ParserT s ParserState m sep
+ -> ParserT s ParserState m end
+ -> ParserT s ParserState m Blocks
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (heads, aligns, indices) <- headerParser
+ lines' <- rowParser indices `sepEndBy1` lineParser
+ footerParser
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ B.table mempty (zip aligns widths) heads lines'
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns' indices =
+ let numColumns = max numColumns' (if null indices then 0 else last indices)
+ lengths' = zipWith (-) indices (0:indices)
+ lengths = reverse $
+ case reverse lengths' of
+ [] -> []
+ [x] -> [x]
+ -- compensate for the fact that intercolumn
+ -- spaces are counted in widths of all columns
+ -- but the last...
+ (x:y:zs) -> if x < y && y - x <= 2
+ then y:y:zs
+ else x:y:zs
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+---
+
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+gridTableWith :: Stream [Char] m Char
+ => ParserT [Char] ParserState m Blocks -- ^ Block list parser
+ -> Bool -- ^ Headerless table
+ -> ParserT [Char] ParserState m Blocks
+gridTableWith blocks headless =
+ tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
+ (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitStringByIndices (init indices) $ trimr line
+
+gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
+
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
+
+removeFinalBar :: String -> String
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Stream [Char] m Char
+ => Bool -- ^ Headerless table
+ -> ParserT [Char] ParserState m Blocks
+ -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int])
+gridTableHeader headless blocks = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- mapM (parseFromString blocks) $ map trim rawHeads
+ return (heads, aligns, indices)
+
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices line)
+
+-- | Parse row of grid table.
+gridTableRow :: Stream [Char] m Char
+ => ParserT [Char] ParserState m Blocks
+ -> [Int]
+ -> ParserT [Char] ParserState m [Blocks]
+gridTableRow blocks indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ mapM (liftM compactifyCell . parseFromString blocks) cols
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+compactifyCell :: Blocks -> Blocks
+compactifyCell bs = head $ compactify [bs]
+
+-- | Parse footer for a grid table.
+gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
+gridTableFooter = blanklines
+
+---
+
+-- | Removes the ParsecT layer from the monad transformer stack
+readWithM :: (Monad m)
+ => ParserT [Char] st m a -- ^ parser
+ -> st -- ^ initial state
+ -> String -- ^ input
+ -> m (Either PandocError a)
+readWithM parser state input =
+ mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input
+
+
+-- | Parse a string with a given parser and state
+readWith :: Parser [Char] st a
+ -> st
+ -> String
+ -> Either PandocError a
+readWith p t inp = runIdentity $ readWithM p t inp
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a)
+ => ParserT [Char] ParserState Identity a
+ -> [Char]
+ -> IO ()
+testStringWith parser str = UTF8.putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateOptions :: ReaderOptions, -- ^ User options
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateAllowLinks :: Bool, -- ^ Allow parsing of links
+ stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
+ stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys
+ stateSubstitutions :: SubstTable, -- ^ List of substitution references
+ stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
+ stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
+ stateMeta :: Meta, -- ^ Document metadata
+ stateMeta' :: F Meta, -- ^ Document metadata
+ stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
+ stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
+ stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
+ stateNextExample :: Int, -- ^ Number of next example
+ stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
+ stateHasChapters :: Bool, -- ^ True if \chapter encountered
+ stateMacros :: [Macro], -- ^ List of macros defined so far
+ stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
+ stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
+ -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
+ -- roles), 3) Additional classes (rest of Attr is unused)).
+ stateCaption :: Maybe Inlines, -- ^ Caption in current environment
+ stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
+ stateContainers :: [String], -- ^ parent include files
+ stateLogMessages :: [LogMessage], -- ^ log messages
+ stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context
+ }
+
+instance Default ParserState where
+ def = defaultParserState
+
+instance HasMeta ParserState where
+ setMeta field val st =
+ st{ stateMeta = setMeta field val $ stateMeta st }
+ deleteMeta field st =
+ st{ stateMeta = deleteMeta field $ stateMeta st }
+
+class HasReaderOptions st where
+ extractReaderOptions :: st -> ReaderOptions
+ getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
+ -- default
+ getOption f = (f . extractReaderOptions) <$> getState
+
+class HasQuoteContext st m where
+ getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+ withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+instance Monad m => HasQuoteContext ParserState m where
+ getQuoteContext = stateQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+instance HasReaderOptions ParserState where
+ extractReaderOptions = stateOptions
+
+class HasHeaderMap st where
+ extractHeaderMap :: st -> M.Map Inlines String
+ updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
+ st -> st
+
+instance HasHeaderMap ParserState where
+ extractHeaderMap = stateHeaders
+ updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
+
+class HasIdentifierList st where
+ extractIdentifierList :: st -> Set.Set String
+ updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st
+
+instance HasIdentifierList ParserState where
+ extractIdentifierList = stateIdentifiers
+ updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
+
+class HasMacros st where
+ extractMacros :: st -> [Macro]
+ updateMacros :: ([Macro] -> [Macro]) -> st -> st
+
+instance HasMacros ParserState where
+ extractMacros = stateMacros
+ updateMacros f st = st{ stateMacros = f $ stateMacros st }
+
+class HasLastStrPosition st where
+ setLastStrPos :: SourcePos -> st -> st
+ getLastStrPos :: st -> Maybe SourcePos
+
+instance HasLastStrPosition ParserState where
+ setLastStrPos pos st = st{ stateLastStrPos = Just pos }
+ getLastStrPos st = stateLastStrPos st
+
+class HasLogMessages st where
+ addLogMessage :: LogMessage -> st -> st
+ getLogMessages :: st -> [LogMessage]
+
+instance HasLogMessages ParserState where
+ addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
+ getLogMessages st = reverse $ stateLogMessages st
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateOptions = def,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateAllowLinks = True,
+ stateMaxNestingLevel = 6,
+ stateLastStrPos = Nothing,
+ stateKeys = M.empty,
+ stateHeaderKeys = M.empty,
+ stateSubstitutions = M.empty,
+ stateNotes = [],
+ stateNotes' = [],
+ stateMeta = nullMeta,
+ stateMeta' = return nullMeta,
+ stateHeaderTable = [],
+ stateHeaders = M.empty,
+ stateIdentifiers = Set.empty,
+ stateNextExample = 1,
+ stateExamples = M.empty,
+ stateHasChapters = False,
+ stateMacros = [],
+ stateRstDefaultRole = "title-reference",
+ stateRstCustomRoles = M.empty,
+ stateCaption = Nothing,
+ stateInHtmlBlock = Nothing,
+ stateContainers = [],
+ stateLogMessages = [],
+ stateMarkdownAttribute = False
+ }
+
+-- | Add a log message.
+logMessage :: (Stream s m a, HasLogMessages st)
+ => LogMessage -> ParserT s st m ()
+logMessage msg = updateState (addLogMessage msg)
+
+-- | Report all the accumulated log messages, according to verbosity level.
+reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
+reportLogMessages = do
+ msgs <- getLogMessages <$> getState
+ mapM_ report msgs
+
+-- | Succeed only if the extension is enabled.
+guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
+guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
+
+-- | Succeed only if the extension is disabled.
+guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
+guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
+
+-- | Update the position on which the last string ended.
+updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos
+
+-- | Whether we are right after the end of a string.
+notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
+notAfterString = do
+ pos <- getPosition
+ st <- getState
+ return $ getLastStrPos st /= Just pos
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, String)]
+
+type NoteTable' = [(String, F Blocks)] -- used in markdown reader
+
+newtype Key = Key String deriving (Show, Read, Eq, Ord)
+
+toKey :: String -> Key
+toKey = Key . map toLower . unwords . words . unbracket
+ where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs
+ unbracket xs = xs
+
+type KeyTable = M.Map Key (Target, Attr)
+
+type SubstTable = M.Map Key Inlines
+
+-- | Add header to the list of headers in state, together
+-- with its associated identifier. If the identifier is null
+-- and the auto_identifers extension is set, generate a new
+-- unique identifier, and update the list of identifiers
+-- in state.
+registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+ => Attr -> Inlines -> ParserT s st m Attr
+registerHeader (ident,classes,kvs) header' = do
+ ids <- extractIdentifierList <$> getState
+ exts <- getOption readerExtensions
+ let insert' = M.insertWith (\_new old -> old)
+ if null ident && Ext_auto_identifiers `extensionEnabled` exts
+ then do
+ let id' = uniqueIdent (B.toList header') ids
+ let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
+ then catMaybes $ map toAsciiChar id'
+ else id'
+ updateState $ updateIdentifierList $ Set.insert id'
+ updateState $ updateIdentifierList $ Set.insert id''
+ updateState $ updateHeaderMap $ insert' header' id'
+ return (id'',classes,kvs)
+ else do
+ unless (null ident) $
+ updateState $ updateHeaderMap $ insert' header' ident
+ return (ident,classes,kvs)
+
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
+smartPunctuation inlineParser = do
+ guardEnabled Ext_smart
+ choice [ quoted inlineParser, apostrophe, dash, ellipses ]
+
+apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
+
+quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
+quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
+
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
+singleQuoted inlineParser = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
+ return . B.singleQuoted . mconcat
+
+doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
+doubleQuoted inlineParser = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
+ return . B.doubleQuoted . mconcat
+
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
+ => QuoteContext
+ -> ParserT s st m ()
+failIfInQuoteContext context = do
+ context' <- getQuoteContext
+ if context' == context
+ then fail "already inside quotes"
+ else return ()
+
+charOrRef :: Stream s m Char => String -> ParserT s st m Char
+charOrRef cs =
+ oneOf cs <|> try (do c <- characterReference
+ guard (c `elem` cs)
+ return c)
+
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ -- single quote start can't be right after str
+ guard =<< notAfterString
+ () <$ charOrRef "'\8216\145"
+
+singleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
+singleQuoteEnd = try $ do
+ charOrRef "'\8217\146"
+ notFollowedBy alphaNum
+
+doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
+doubleQuoteStart = do
+ failIfInQuoteContext InDoubleQuote
+ try $ do charOrRef "\"\8220\147"
+ notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
+
+doubleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
+doubleQuoteEnd = void (charOrRef "\"\8221\148")
+
+ellipses :: Stream s m Char
+ => ParserT s st m Inlines
+ellipses = try (string "..." >> return (B.str "\8230"))
+
+dash :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m Inlines
+dash = try $ do
+ oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
+ if oldDashes
+ then do
+ char '-'
+ (char '-' >> return (B.str "\8212"))
+ <|> (lookAhead digit >> return (B.str "\8211"))
+ else do
+ string "--"
+ (char '-' >> return (B.str "\8212"))
+ <|> return (B.str "\8211")
+
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: Stream s m a
+ => ParserT s ParserState m a
+ -> ParserT s ParserState m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+citeKey :: (Stream s m Char, HasLastStrPosition st)
+ => ParserT s st m (Bool, String)
+citeKey = try $ do
+ guard =<< notAfterString
+ suppress_author <- option False (char '-' *> return True)
+ char '@'
+ firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
+ let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+ let internal p = try $ p <* lookAhead regchar
+ rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
+ try (oneOf ":/" <* lookAhead (char '/'))
+ let key = firstChar:rest
+ return (suppress_author, key)
+
+
+token :: (Stream s m t)
+ => (t -> String)
+ -> (t -> SourcePos)
+ -> (t -> Maybe a)
+ -> ParsecT s st m a
+token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
+
+--
+-- Macros
+--
+
+-- | Parse a \newcommand or \renewcommand macro definition.
+macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
+ => ParserT [Char] st m Blocks
+macro = do
+ apply <- getOption readerApplyMacros
+ inp <- getInput
+ case parseMacroDefinitions inp of
+ ([], _) -> mzero
+ (ms, rest) -> do def' <- count (length inp - length rest) anyChar
+ if apply
+ then do
+ updateState $ \st ->
+ updateMacros (ms ++) st
+ return mempty
+ else return $ rawBlock "latex" def'
+
+-- | Apply current macros to string.
+applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
+ => String
+ -> ParserT [Char] st m String
+applyMacros' target = do
+ apply <- getOption readerApplyMacros
+ if apply
+ then do macros <- extractMacros <$> getState
+ return $ applyMacros macros target
+ else return target
+
+infixr 5 <+?>
+(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
+
+extractIdClass :: Attr -> Attr
+extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
+ where
+ ident' = case (lookup "id" kvs) of
+ Just v -> v
+ Nothing -> ident
+ cls' = case (lookup "class" kvs) of
+ Just cl -> words cl
+ Nothing -> cls
+ kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
+
+insertIncludedFile :: PandocMonad m
+ => ParserT String ParserState m Blocks
+ -> [FilePath] -> FilePath
+ -> ParserT String ParserState m Blocks
+insertIncludedFile blocks dirs f = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ containers <- stateContainers <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ updateState $ \s -> s{ stateContainers = f : stateContainers s }
+ mbcontents <- readFileFromDirs dirs f
+ contents <- case mbcontents of
+ Just s -> return s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f oldPos
+ return ""
+ setPosition $ newPos f 1 1
+ setInput contents
+ bs <- blocks
+ setInput oldInput
+ setPosition oldPos
+ updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
+ return bs
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
new file mode 100644
index 000000000..256f38b0c
--- /dev/null
+++ b/src/Text/Pandoc/Pretty.hs
@@ -0,0 +1,557 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-}
+{-
+Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Pretty
+ Copyright : Copyright (C) 2010-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A prettyprinting library for the production of text documents,
+including wrapped text, indentated blocks, and tables.
+-}
+
+module Text.Pandoc.Pretty (
+ Doc
+ , render
+ , cr
+ , blankline
+ , blanklines
+ , space
+ , text
+ , char
+ , prefixed
+ , flush
+ , nest
+ , hang
+ , beforeNonBlank
+ , nowrap
+ , afterBreak
+ , offset
+ , minOffset
+ , height
+ , lblock
+ , cblock
+ , rblock
+ , (<>)
+ , (<+>)
+ , ($$)
+ , ($+$)
+ , isEmpty
+ , empty
+ , cat
+ , hcat
+ , hsep
+ , vcat
+ , vsep
+ , nestle
+ , chomp
+ , inside
+ , braces
+ , brackets
+ , parens
+ , quotes
+ , doubleQuotes
+ , charWidth
+ , realLength
+ )
+
+where
+import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..))
+import qualified Data.Sequence as Seq
+import Data.Foldable (toList)
+import Data.List (intersperse)
+import Data.String
+import Control.Monad.State
+import Data.Char (isSpace)
+import Data.Monoid ((<>))
+
+data RenderState a = RenderState{
+ output :: [a] -- ^ In reverse order
+ , prefix :: String
+ , usePrefix :: Bool
+ , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping
+ , column :: Int
+ , newlines :: Int -- ^ Number of preceding newlines
+ }
+
+type DocState a = State (RenderState a) ()
+
+data D = Text Int String
+ | Block Int [String]
+ | Prefixed String Doc
+ | BeforeNonBlank Doc
+ | Flush Doc
+ | BreakingSpace
+ | AfterBreak String
+ | CarriageReturn
+ | NewLine
+ | BlankLines Int -- number of blank lines
+ deriving (Show)
+
+newtype Doc = Doc { unDoc :: Seq D }
+ deriving (Monoid, Show)
+
+instance IsString Doc where
+ fromString = text
+
+isBlank :: D -> Bool
+isBlank BreakingSpace = True
+isBlank CarriageReturn = True
+isBlank NewLine = True
+isBlank (BlankLines _) = True
+isBlank (Text _ (c:_)) = isSpace c
+isBlank _ = False
+
+-- | True if the document is empty.
+isEmpty :: Doc -> Bool
+isEmpty = Seq.null . unDoc
+
+-- | The empty document.
+empty :: Doc
+empty = mempty
+
+-- | Concatenate a list of 'Doc's.
+cat :: [Doc] -> Doc
+cat = mconcat
+
+-- | Same as 'cat'.
+hcat :: [Doc] -> Doc
+hcat = mconcat
+
+-- | Concatenate a list of 'Doc's, putting breakable spaces
+-- between them.
+infixr 6 <+>
+(<+>) :: Doc -> Doc -> Doc
+(<+>) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> space <> y
+
+-- | Same as 'cat', but putting breakable spaces between the
+-- 'Doc's.
+hsep :: [Doc] -> Doc
+hsep = foldr (<+>) empty
+
+infixr 5 $$
+-- | @a $$ b@ puts @a@ above @b@.
+($$) :: Doc -> Doc -> Doc
+($$) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> cr <> y
+
+infixr 5 $+$
+-- | @a $+$ b@ puts @a@ above @b@, with a blank line between.
+($+$) :: Doc -> Doc -> Doc
+($+$) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> blankline <> y
+
+-- | List version of '$$'.
+vcat :: [Doc] -> Doc
+vcat = foldr ($$) empty
+
+-- | List version of '$+$'.
+vsep :: [Doc] -> Doc
+vsep = foldr ($+$) empty
+
+-- | Removes leading blank lines from a 'Doc'.
+nestle :: Doc -> Doc
+nestle (Doc d) = Doc $ go d
+ where go x = case viewl x of
+ (BlankLines _ :< rest) -> go rest
+ (NewLine :< rest) -> go rest
+ _ -> x
+
+-- | Chomps trailing blank space off of a 'Doc'.
+chomp :: Doc -> Doc
+chomp d = Doc (fromList dl')
+ where dl = toList (unDoc d)
+ dl' = reverse $ go $ reverse dl
+ go [] = []
+ go (BreakingSpace : xs) = go xs
+ go (CarriageReturn : xs) = go xs
+ go (NewLine : xs) = go xs
+ go (BlankLines _ : xs) = go xs
+ go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
+ go xs = xs
+
+outp :: (IsString a) => Int -> String -> DocState a
+outp off s | off < 0 = do -- offset < 0 means newline characters
+ st' <- get
+ let rawpref = prefix st'
+ when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
+ let pref = reverse $ dropWhile isSpace $ reverse rawpref
+ modify $ \st -> st{ output = fromString pref : output st
+ , column = column st + realLength pref }
+ let numnewlines = length $ takeWhile (=='\n') $ reverse s
+ modify $ \st -> st { output = fromString s : output st
+ , column = 0
+ , newlines = newlines st + numnewlines }
+outp off s = do -- offset >= 0 (0 might be combining char)
+ st' <- get
+ let pref = prefix st'
+ when (column st' == 0 && usePrefix st' && not (null pref)) $ do
+ modify $ \st -> st{ output = fromString pref : output st
+ , column = column st + realLength pref }
+ modify $ \st -> st{ output = fromString s : output st
+ , column = column st + off
+ , newlines = 0 }
+
+-- | Renders a 'Doc'. @render (Just n)@ will use
+-- a line length of @n@ to reflow text on breakable spaces.
+-- @render Nothing@ will not reflow text.
+render :: (IsString a) => Maybe Int -> Doc -> a
+render linelen doc = fromString . mconcat . reverse . output $
+ execState (renderDoc doc) startingState
+ where startingState = RenderState{
+ output = mempty
+ , prefix = ""
+ , usePrefix = True
+ , lineLength = linelen
+ , column = 0
+ , newlines = 2 }
+
+renderDoc :: (IsString a, Monoid a)
+ => Doc -> DocState a
+renderDoc = renderList . toList . unDoc
+
+data IsBlock = IsBlock Int [String]
+
+-- This would be nicer with a pattern synonym
+-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..)
+
+renderList :: (IsString a, Monoid a)
+ => [D] -> DocState a
+renderList [] = return ()
+renderList (Text off s : xs) = do
+ outp off s
+ renderList xs
+
+renderList (Prefixed pref d : xs) = do
+ st <- get
+ let oldPref = prefix st
+ put st{ prefix = prefix st ++ pref }
+ renderDoc d
+ modify $ \s -> s{ prefix = oldPref }
+ renderList xs
+
+renderList (Flush d : xs) = do
+ st <- get
+ let oldUsePrefix = usePrefix st
+ put st{ usePrefix = False }
+ renderDoc d
+ modify $ \s -> s{ usePrefix = oldUsePrefix }
+ renderList xs
+
+renderList (BeforeNonBlank d : xs) =
+ case xs of
+ (x:_) | isBlank x -> renderList xs
+ | otherwise -> renderDoc d >> renderList xs
+ [] -> renderList xs
+
+renderList [BlankLines _] = return ()
+
+renderList (BlankLines m : BlankLines n : xs) =
+ renderList (BlankLines (max m n) : xs)
+
+renderList (BlankLines num : xs) = do
+ st <- get
+ case output st of
+ _ | newlines st > num -> return ()
+ | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
+ renderList xs
+
+renderList (CarriageReturn : BlankLines m : xs) =
+ renderList (BlankLines m : xs)
+
+renderList (CarriageReturn : xs) = do
+ st <- get
+ if newlines st > 0 || null xs
+ then renderList xs
+ else do
+ outp (-1) "\n"
+ renderList xs
+
+renderList (NewLine : xs) = do
+ outp (-1) "\n"
+ renderList xs
+
+renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs)
+renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
+renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs)
+renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
+renderList (BreakingSpace : xs) = do
+ let isText (Text _ _) = True
+ isText (Block _ _) = True
+ isText (AfterBreak _) = True
+ isText _ = False
+ let isBreakingSpace BreakingSpace = True
+ isBreakingSpace _ = False
+ let xs' = dropWhile isBreakingSpace xs
+ let next = takeWhile isText xs'
+ st <- get
+ let off = sum $ map offsetOf next
+ case lineLength st of
+ Just l | column st + 1 + off > l -> do
+ outp (-1) "\n"
+ renderList xs'
+ _ -> do
+ outp 1 " "
+ renderList xs'
+
+renderList (AfterBreak s : xs) = do
+ st <- get
+ if newlines st > 0
+ then outp (realLength s) s
+ else return ()
+ renderList xs
+
+renderList (Block i1 s1 : Block i2 s2 : xs) =
+ renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs)
+
+renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) =
+ renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs)
+
+renderList (Block _width lns : xs) = do
+ st <- get
+ let oldPref = prefix st
+ case column st - realLength oldPref of
+ n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
+ _ -> return ()
+ renderList $ intersperse CarriageReturn (map (Text 0) lns)
+ modify $ \s -> s{ prefix = oldPref }
+ renderList xs
+
+mergeBlocks :: Bool -> IsBlock -> IsBlock -> D
+mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) =
+ Block (w1 + w2 + if addSpace then 1 else 0) $
+ zipWith (\l1 l2 -> pad w1 l1 ++ l2) lns1' (map sp lns2')
+ where (lns1', lns2') = case (length lns1, length lns2) of
+ (x, y) | x > y -> (lns1,
+ lns2 ++ replicate (x - y) "")
+ | x < y -> (lns1 ++ replicate (y - x) "",
+ lns2)
+ | otherwise -> (lns1, lns2)
+ pad n s = s ++ replicate (n - realLength s) ' '
+ sp "" = ""
+ sp xs = if addSpace then (' ' : xs) else xs
+
+offsetOf :: D -> Int
+offsetOf (Text o _) = o
+offsetOf (Block w _) = w
+offsetOf BreakingSpace = 1
+offsetOf _ = 0
+
+-- | A literal string.
+text :: String -> Doc
+text = Doc . toChunks
+ where toChunks :: String -> Seq D
+ toChunks [] = mempty
+ toChunks s = case break (=='\n') s of
+ ([], _:ys) -> NewLine <| toChunks ys
+ (xs, _:ys) -> Text (realLength xs) xs <|
+ (NewLine <| toChunks ys)
+ (xs, []) -> singleton $ Text (realLength xs) xs
+
+-- | A character.
+char :: Char -> Doc
+char c = text [c]
+
+-- | A breaking (reflowable) space.
+space :: Doc
+space = Doc $ singleton BreakingSpace
+
+-- | A carriage return. Does nothing if we're at the beginning of
+-- a line; otherwise inserts a newline.
+cr :: Doc
+cr = Doc $ singleton CarriageReturn
+
+-- | Inserts a blank line unless one exists already.
+-- (@blankline <> blankline@ has the same effect as @blankline@.
+blankline :: Doc
+blankline = Doc $ singleton (BlankLines 1)
+
+-- | Inserts a blank lines unless they exists already.
+-- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@.
+blanklines :: Int -> Doc
+blanklines n = Doc $ singleton (BlankLines n)
+
+-- | Uses the specified string as a prefix for every line of
+-- the inside document (except the first, if not at the beginning
+-- of the line).
+prefixed :: String -> Doc -> Doc
+prefixed pref doc = Doc $ singleton $ Prefixed pref doc
+
+-- | Makes a 'Doc' flush against the left margin.
+flush :: Doc -> Doc
+flush doc = Doc $ singleton $ Flush doc
+
+-- | Indents a 'Doc' by the specified number of spaces.
+nest :: Int -> Doc -> Doc
+nest ind = prefixed (replicate ind ' ')
+
+-- | A hanging indent. @hang ind start doc@ prints @start@,
+-- then @doc@, leaving an indent of @ind@ spaces on every
+-- line but the first.
+hang :: Int -> Doc -> Doc -> Doc
+hang ind start doc = start <> nest ind doc
+
+-- | @beforeNonBlank d@ conditionally includes @d@ unless it is
+-- followed by blank space.
+beforeNonBlank :: Doc -> Doc
+beforeNonBlank d = Doc $ singleton (BeforeNonBlank d)
+
+-- | Makes a 'Doc' non-reflowable.
+nowrap :: Doc -> Doc
+nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc
+ where replaceSpace _ BreakingSpace = Text 1 " "
+ replaceSpace _ x = x
+
+-- | Content to print only if it comes at the beginning of a line,
+-- to be used e.g. for escaping line-initial `.` in groff man.
+afterBreak :: String -> Doc
+afterBreak s = Doc $ singleton (AfterBreak s)
+
+-- | Returns the width of a 'Doc'.
+offset :: Doc -> Int
+offset d = case map realLength . lines . render Nothing $ d of
+ [] -> 0
+ os -> maximum os
+
+-- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces.
+minOffset :: Doc -> Int
+minOffset d = maximum (0: map realLength (lines $ render (Just 0) d))
+
+-- | @lblock n d@ is a block of width @n@ characters, with
+-- text derived from @d@ and aligned to the left.
+lblock :: Int -> Doc -> Doc
+lblock = block id
+
+-- | Like 'lblock' but aligned to the right.
+rblock :: Int -> Doc -> Doc
+rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w
+
+-- | Like 'lblock' but aligned centered.
+cblock :: Int -> Doc -> Doc
+cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w
+
+-- | Returns the height of a block or other 'Doc'.
+height :: Doc -> Int
+height = length . lines . render Nothing
+
+block :: (String -> String) -> Int -> Doc -> Doc
+block filler width d
+ | width < 1 && not (isEmpty d) = error "Text.Pandoc.Pretty.block: width < 1"
+ | otherwise = Doc $ singleton $ Block width $ map filler
+ $ chop width $ render (Just width) d
+
+chop :: Int -> String -> [String]
+chop _ [] = []
+chop n cs = case break (=='\n') cs of
+ (xs, ys) -> if len <= n
+ then case ys of
+ [] -> [xs]
+ ['\n'] -> [xs]
+ (_:zs) -> xs : chop n zs
+ else take n xs : chop n (drop n xs ++ ys)
+ where len = realLength xs
+
+-- | Encloses a 'Doc' inside a start and end 'Doc'.
+inside :: Doc -> Doc -> Doc -> Doc
+inside start end contents =
+ start <> contents <> end
+
+-- | Puts a 'Doc' in curly braces.
+braces :: Doc -> Doc
+braces = inside (char '{') (char '}')
+
+-- | Puts a 'Doc' in square brackets.
+brackets :: Doc -> Doc
+brackets = inside (char '[') (char ']')
+
+-- | Puts a 'Doc' in parentheses.
+parens :: Doc -> Doc
+parens = inside (char '(') (char ')')
+
+-- | Wraps a 'Doc' in single quotes.
+quotes :: Doc -> Doc
+quotes = inside (char '\'') (char '\'')
+
+-- | Wraps a 'Doc' in double quotes.
+doubleQuotes :: Doc -> Doc
+doubleQuotes = inside (char '"') (char '"')
+
+-- | Returns width of a character in a monospace font: 0 for a combining
+-- character, 1 for a regular character, 2 for an East Asian wide character.
+charWidth :: Char -> Int
+charWidth c =
+ case c of
+ _ | c < '\x0300' -> 1
+ | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
+ | c >= '\x0370' && c <= '\x10FC' -> 1
+ | c >= '\x1100' && c <= '\x115F' -> 2
+ | c >= '\x1160' && c <= '\x11A2' -> 1
+ | c >= '\x11A3' && c <= '\x11A7' -> 2
+ | c >= '\x11A8' && c <= '\x11F9' -> 1
+ | c >= '\x11FA' && c <= '\x11FF' -> 2
+ | c >= '\x1200' && c <= '\x2328' -> 1
+ | c >= '\x2329' && c <= '\x232A' -> 2
+ | c >= '\x232B' && c <= '\x2E31' -> 1
+ | c >= '\x2E80' && c <= '\x303E' -> 2
+ | c == '\x303F' -> 1
+ | c >= '\x3041' && c <= '\x3247' -> 2
+ | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
+ | c >= '\x3250' && c <= '\x4DBF' -> 2
+ | c >= '\x4DC0' && c <= '\x4DFF' -> 1
+ | c >= '\x4E00' && c <= '\xA4C6' -> 2
+ | c >= '\xA4D0' && c <= '\xA95F' -> 1
+ | c >= '\xA960' && c <= '\xA97C' -> 2
+ | c >= '\xA980' && c <= '\xABF9' -> 1
+ | c >= '\xAC00' && c <= '\xD7FB' -> 2
+ | c >= '\xD800' && c <= '\xDFFF' -> 1
+ | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
+ | c >= '\xF900' && c <= '\xFAFF' -> 2
+ | c >= '\xFB00' && c <= '\xFDFD' -> 1
+ | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
+ | c >= '\xFE10' && c <= '\xFE19' -> 2
+ | c >= '\xFE20' && c <= '\xFE26' -> 1
+ | c >= '\xFE30' && c <= '\xFE6B' -> 2
+ | c >= '\xFE70' && c <= '\xFEFF' -> 1
+ | c >= '\xFF01' && c <= '\xFF60' -> 2
+ | c >= '\xFF61' && c <= '\x16A38' -> 1
+ | c >= '\x1B000' && c <= '\x1B001' -> 2
+ | c >= '\x1D000' && c <= '\x1F1FF' -> 1
+ | c >= '\x1F200' && c <= '\x1F251' -> 2
+ | c >= '\x1F300' && c <= '\x1F773' -> 1
+ | c >= '\x20000' && c <= '\x3FFFD' -> 2
+ | otherwise -> 1
+
+-- | Get real length of string, taking into account combining and double-wide
+-- characters.
+realLength :: String -> Int
+realLength = foldr (\a b -> charWidth a + b) 0
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
new file mode 100644
index 000000000..294a38a1b
--- /dev/null
+++ b/src/Text/Pandoc/Process.hs
@@ -0,0 +1,98 @@
+{-
+Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Process
+ Copyright : Copyright (C) 2013-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+ByteString variant of 'readProcessWithExitCode'.
+-}
+module Text.Pandoc.Process (pipeProcess)
+where
+import System.Process
+import System.Exit (ExitCode (..))
+import Control.Exception
+import System.IO (hClose, hFlush)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Control.Monad (unless)
+import qualified Data.ByteString.Lazy as BL
+
+{- |
+Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings
+instead of strings and allows setting environment variables.
+
+@readProcessWithExitCode@ creates an external process, reads its
+standard output strictly, waits until the process
+terminates, and then returns the 'ExitCode' of the process
+and the standard output. stderr is inherited from the parent.
+
+If an asynchronous exception is thrown to the thread executing
+@readProcessWithExitCode@, the forked process will be terminated and
+@readProcessWithExitCode@ will wait (block) until the process has been
+terminated.
+-}
+
+pipeProcess
+ :: Maybe [(String, String)] -- ^ environment variables
+ -> FilePath -- ^ Filename of the executable (see 'proc' for details)
+ -> [String] -- ^ any arguments
+ -> BL.ByteString -- ^ standard input
+ -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout
+pipeProcess mbenv cmd args input =
+ mask $ \restore -> do
+ (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args)
+ { env = mbenv,
+ std_in = CreatePipe,
+ std_out = CreatePipe,
+ std_err = Inherit }
+ flip onException
+ (do hClose inh; hClose outh;
+ terminateProcess pid; waitForProcess pid) $ restore $ do
+ -- fork off a thread to start consuming stdout
+ out <- BL.hGetContents outh
+ waitOut <- forkWait $ evaluate $ BL.length out
+
+ -- now write and flush any input
+ let writeInput = do
+ unless (BL.null input) $ do
+ BL.hPutStr inh input
+ hFlush inh
+ hClose inh
+
+ writeInput
+
+ -- wait on the output
+ waitOut
+
+ hClose outh
+
+ -- wait on the process
+ ex <- waitForProcess pid
+
+ return (ex, out)
+
+forkWait :: IO a -> IO (IO a)
+forkWait a = do
+ res <- newEmptyMVar
+ _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res
+ return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return)
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
new file mode 100644
index 000000000..b0bcbd580
--- /dev/null
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -0,0 +1,128 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of CommonMark-formatted plain text to 'Pandoc' document.
+
+CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
+-}
+module Text.Pandoc.Readers.CommonMark (readCommonMark)
+where
+
+import CMark
+import Data.Text (unpack, pack)
+import Data.List (groupBy)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Class (PandocMonad)
+
+-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
+readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readCommonMark opts s = return $
+ nodeToPandoc $ commonmarkToNode opts' $ pack s
+ where opts' = if extensionEnabled Ext_smart (readerExtensions opts)
+ then [optNormalize, optSmart]
+ else [optNormalize]
+
+nodeToPandoc :: Node -> Pandoc
+nodeToPandoc (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr addBlock [] nodes
+nodeToPandoc n = -- shouldn't happen
+ Pandoc nullMeta $ foldr addBlock [] [n]
+
+addBlocks :: [Node] -> [Block]
+addBlocks = foldr addBlock []
+
+addBlock :: Node -> [Block] -> [Block]
+addBlock (Node _ PARAGRAPH nodes) =
+ (Para (addInlines nodes) :)
+addBlock (Node _ THEMATIC_BREAK _) =
+ (HorizontalRule :)
+addBlock (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks nodes) :)
+addBlock (Node _ (HTML_BLOCK t) _) =
+ (RawBlock (Format "html") (unpack t) :)
+-- Note: the cmark parser will never generate CUSTOM_BLOCK,
+-- so we don't need to handle it:
+addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
+ id
+addBlock (Node _ (CODE_BLOCK info t) _) =
+ (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+addBlock (Node _ (HEADING lev) nodes) =
+ (Header lev ("",[],[]) (addInlines nodes) :)
+addBlock (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks . children) nodes) :)
+ where constructor = case listType listAttrs of
+ BULLET_LIST -> BulletList
+ ORDERED_LIST -> OrderedList
+ (start, DefaultStyle, delim)
+ start = listStart listAttrs
+ setTightness = if listTight listAttrs
+ then map paraToPlain
+ else id
+ paraToPlain (Para xs) = Plain (xs)
+ paraToPlain x = x
+ delim = case listDelim listAttrs of
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock (Node _ ITEM _) = id -- handled in LIST
+addBlock _ = id
+
+children :: Node -> [Node]
+children (Node _ _ ns) = ns
+
+addInlines :: [Node] -> [Inline]
+addInlines = foldr addInline []
+
+addInline :: Node -> [Inline] -> [Inline]
+addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+ where raw = unpack t
+ clumps = groupBy samekind raw
+ samekind ' ' ' ' = True
+ samekind ' ' _ = False
+ samekind _ ' ' = False
+ samekind _ _ = True
+ toinl (' ':_) = Space
+ toinl xs = Str xs
+addInline (Node _ LINEBREAK _) = (LineBreak :)
+addInline (Node _ SOFTBREAK _) = (SoftBreak :)
+addInline (Node _ (HTML_INLINE t) _) =
+ (RawInline (Format "html") (unpack t) :)
+-- Note: the cmark parser will never generate CUSTOM_BLOCK,
+-- so we don't need to handle it:
+addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
+ id
+addInline (Node _ (CODE t) _) =
+ (Code ("",[],[]) (unpack t) :)
+addInline (Node _ EMPH nodes) =
+ (Emph (addInlines nodes) :)
+addInline (Node _ STRONG nodes) =
+ (Strong (addInlines nodes) :)
+addInline (Node _ (LINK url title) nodes) =
+ (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
+addInline (Node _ (IMAGE url title) nodes) =
+ (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
+addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
new file mode 100644
index 000000000..bef256a93
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -0,0 +1,1055 @@
+module Text.Pandoc.Readers.DocBook ( readDocBook ) where
+import Data.Char (toUpper)
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.XML.Light
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Data.Either (rights)
+import Data.Generics
+import Data.Char (isSpace)
+import Control.Monad.State
+import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
+import Text.TeXMath (readMathML, writeTeX)
+import Data.Default
+import Data.Foldable (asum)
+import Text.Pandoc.Class (PandocMonad)
+
+{-
+
+List of all DocBook tags, with [x] indicating implemented,
+[o] meaning intentionally left unimplemented (pass through):
+
+[o] abbrev - An abbreviation, especially one followed by a period
+[x] abstract - A summary
+[o] accel - A graphical user interface (GUI) keyboard shortcut
+[x] ackno - Acknowledgements in an Article
+[o] acronym - An often pronounceable word made from the initial
+[o] action - A response to a user event
+[o] address - A real-world address, generally a postal address
+[ ] affiliation - The institutional affiliation of an individual
+[ ] alt - Text representation for a graphical element
+[o] anchor - A spot in the document
+[x] answer - An answer to a question posed in a QandASet
+[x] appendix - An appendix in a Book or Article
+[x] appendixinfo - Meta-information for an Appendix
+[o] application - The name of a software program
+[x] area - A region defined for a Callout in a graphic or code example
+[x] areaset - A set of related areas in a graphic or code example
+[x] areaspec - A collection of regions in a graphic or code example
+[ ] arg - An argument in a CmdSynopsis
+[x] article - An article
+[x] articleinfo - Meta-information for an Article
+[ ] artpagenums - The page numbers of an article as published
+[x] attribution - The source of a block quote or epigraph
+[ ] audiodata - Pointer to external audio data
+[ ] audioobject - A wrapper for audio data and its associated meta-information
+[x] author - The name of an individual author
+[ ] authorblurb - A short description or note about an author
+[x] authorgroup - Wrapper for author information when a document has
+ multiple authors or collabarators
+[x] authorinitials - The initials or other short identifier for an author
+[o] beginpage - The location of a page break in a print version of the document
+[ ] bibliocoverage - The spatial or temporal coverage of a document
+[x] bibliodiv - A section of a Bibliography
+[x] biblioentry - An entry in a Bibliography
+[x] bibliography - A bibliography
+[ ] bibliographyinfo - Meta-information for a Bibliography
+[ ] biblioid - An identifier for a document
+[o] bibliolist - A wrapper for a set of bibliography entries
+[ ] bibliomisc - Untyped bibliographic information
+[x] bibliomixed - An entry in a Bibliography
+[ ] bibliomset - A cooked container for related bibliographic information
+[ ] biblioref - A cross reference to a bibliographic entry
+[ ] bibliorelation - The relationship of a document to another
+[ ] biblioset - A raw container for related bibliographic information
+[ ] bibliosource - The source of a document
+[ ] blockinfo - Meta-information for a block element
+[x] blockquote - A quotation set off from the main text
+[x] book - A book
+[x] bookinfo - Meta-information for a Book
+[x] bridgehead - A free-floating heading
+[x] callout - A “called out” description of a marked Area
+[x] calloutlist - A list of Callouts
+[x] caption - A caption
+[x] caution - A note of caution
+[x] chapter - A chapter, as of a book
+[x] chapterinfo - Meta-information for a Chapter
+[ ] citation - An inline bibliographic reference to another published work
+[ ] citebiblioid - A citation of a bibliographic identifier
+[ ] citerefentry - A citation to a reference page
+[ ] citetitle - The title of a cited work
+[ ] city - The name of a city in an address
+[x] classname - The name of a class, in the object-oriented programming sense
+[ ] classsynopsis - The syntax summary for a class definition
+[ ] classsynopsisinfo - Information supplementing the contents of
+ a ClassSynopsis
+[ ] cmdsynopsis - A syntax summary for a software command
+[ ] co - The location of a callout embedded in text
+[x] code - An inline code fragment
+[x] col - Specifications for a column in an HTML table
+[x] colgroup - A group of columns in an HTML table
+[ ] collab - Identifies a collaborator
+[ ] collabname - The name of a collaborator
+[ ] colophon - Text at the back of a book describing facts about its production
+[x] colspec - Specifications for a column in a table
+[x] command - The name of an executable program or other software command
+[x] computeroutput - Data, generally text, displayed or presented by a computer
+[ ] confdates - The dates of a conference for which a document was written
+[ ] confgroup - A wrapper for document meta-information about a conference
+[ ] confnum - An identifier, frequently numerical, associated with a conference for which a document was written
+[ ] confsponsor - The sponsor of a conference for which a document was written
+[ ] conftitle - The title of a conference for which a document was written
+[x] constant - A programming or system constant
+[ ] constraint - A constraint in an EBNF production
+[ ] constraintdef - The definition of a constraint in an EBNF production
+[ ] constructorsynopsis - A syntax summary for a constructor
+[ ] contractnum - The contract number of a document
+[ ] contractsponsor - The sponsor of a contract
+[ ] contrib - A summary of the contributions made to a document by a
+ credited source
+[ ] copyright - Copyright information about a document
+[ ] coref - A cross reference to a co
+[ ] corpauthor - A corporate author, as opposed to an individual
+[ ] corpcredit - A corporation or organization credited in a document
+[ ] corpname - The name of a corporation
+[ ] country - The name of a country
+[ ] database - The name of a database, or part of a database
+[x] date - The date of publication or revision of a document
+[ ] dedication - A wrapper for the dedication section of a book
+[ ] destructorsynopsis - A syntax summary for a destructor
+[ ] edition - The name or number of an edition of a document
+[ ] editor - The name of the editor of a document
+[x] email - An email address
+[x] emphasis - Emphasized text
+[x] entry - A cell in a table
+[ ] entrytbl - A subtable appearing in place of an Entry in a table
+[ ] envar - A software environment variable
+[x] epigraph - A short inscription at the beginning of a document or component
+ note: also handle embedded attribution tag
+[x] equation - A displayed mathematical equation
+[ ] errorcode - An error code
+[ ] errorname - An error name
+[ ] errortext - An error message.
+[ ] errortype - The classification of an error message
+[ ] example - A formal example, with a title
+[ ] exceptionname - The name of an exception
+[ ] fax - A fax number
+[ ] fieldsynopsis - The name of a field in a class definition
+[x] figure - A formal figure, generally an illustration, with a title
+[x] filename - The name of a file
+[ ] firstname - The first name of a person
+[ ] firstterm - The first occurrence of a term
+[x] footnote - A footnote
+[ ] footnoteref - A cross reference to a footnote (a footnote mark)
+[x] foreignphrase - A word or phrase in a language other than the primary
+ language of the document
+[x] formalpara - A paragraph with a title
+[ ] funcdef - A function (subroutine) name and its return type
+[ ] funcparams - Parameters for a function referenced through a function
+ pointer in a synopsis
+[ ] funcprototype - The prototype of a function
+[ ] funcsynopsis - The syntax summary for a function definition
+[ ] funcsynopsisinfo - Information supplementing the FuncDefs of a FuncSynopsis
+[x] function - The name of a function or subroutine, as in a
+ programming language
+[x] glossary - A glossary
+[x] glossaryinfo - Meta-information for a Glossary
+[x] glossdef - A definition in a GlossEntry
+[x] glossdiv - A division in a Glossary
+[x] glossentry - An entry in a Glossary or GlossList
+[x] glosslist - A wrapper for a set of GlossEntrys
+[x] glosssee - A cross-reference from one GlossEntry to another
+[x] glossseealso - A cross-reference from one GlossEntry to another
+[x] glossterm - A glossary term
+[ ] graphic - A displayed graphical object (not an inline)
+ Note: in DocBook v5 `graphic` is discarded
+[ ] graphicco - A graphic that contains callout areas
+ Note: in DocBook v5 `graphicco` is discarded
+[ ] group - A group of elements in a CmdSynopsis
+[ ] guibutton - The text on a button in a GUI
+[ ] guiicon - Graphic and/or text appearing as a icon in a GUI
+[ ] guilabel - The text of a label in a GUI
+[x] guimenu - The name of a menu in a GUI
+[x] guimenuitem - The name of a terminal menu item in a GUI
+[x] guisubmenu - The name of a submenu in a GUI
+[ ] hardware - A physical part of a computer system
+[ ] highlights - A summary of the main points of the discussed component
+[ ] holder - The name of the individual or organization that holds a copyright
+[o] honorific - The title of a person
+[ ] html:form - An HTML form
+[x] imagedata - Pointer to external image data (only `fileref` attribute
+ implemented but not `entityref` which would require parsing of the DTD)
+[x] imageobject - A wrapper for image data and its associated meta-information
+[ ] imageobjectco - A wrapper for an image object with callouts
+[x] important - An admonition set off from the text
+[x] index - An index
+[x] indexdiv - A division in an index
+[x] indexentry - An entry in an index
+[x] indexinfo - Meta-information for an Index
+[x] indexterm - A wrapper for terms to be indexed
+[x] info - A wrapper for information about a component or other block. (DocBook v5)
+[x] informalequation - A displayed mathematical equation without a title
+[x] informalexample - A displayed example without a title
+[ ] informalfigure - A untitled figure
+[ ] informaltable - A table without a title
+[ ] initializer - The initializer for a FieldSynopsis
+[x] inlineequation - A mathematical equation or expression occurring inline
+[ ] inlinegraphic - An object containing or pointing to graphical data
+ that will be rendered inline
+[x] inlinemediaobject - An inline media object (video, audio, image, and so on)
+[ ] interface - An element of a GUI
+[ ] interfacename - The name of an interface
+[ ] invpartnumber - An inventory part number
+[ ] isbn - The International Standard Book Number of a document
+[ ] issn - The International Standard Serial Number of a periodical
+[ ] issuenum - The number of an issue of a journal
+[x] itemizedlist - A list in which each entry is marked with a bullet or
+ other dingbat
+[ ] itermset - A set of index terms in the meta-information of a document
+[ ] jobtitle - The title of an individual in an organization
+[x] keycap - The text printed on a key on a keyboard
+[ ] keycode - The internal, frequently numeric, identifier for a key
+ on a keyboard
+[x] keycombo - A combination of input actions
+[ ] keysym - The symbolic name of a key on a keyboard
+[ ] keyword - One of a set of keywords describing the content of a document
+[ ] keywordset - A set of keywords describing the content of a document
+[ ] label - A label on a Question or Answer
+[ ] legalnotice - A statement of legal obligations or requirements
+[ ] lhs - The left-hand side of an EBNF production
+[ ] lineage - The portion of a person's name indicating a relationship to
+ ancestors
+[ ] lineannotation - A comment on a line in a verbatim listing
+[x] link - A hypertext link
+[x] listitem - A wrapper for the elements of a list item
+[x] literal - Inline text that is some literal value
+[x] literallayout - A block of text in which line breaks and white space are
+ to be reproduced faithfully
+[ ] lot - A list of the titles of formal objects (as tables or figures) in
+ a document
+[ ] lotentry - An entry in a list of titles
+[ ] manvolnum - A reference volume number
+[x] markup - A string of formatting markup in text that is to be
+ represented literally
+[ ] mathphrase - A mathematical phrase, an expression that can be represented
+ with ordinary text and a small amount of markup
+[ ] medialabel - A name that identifies the physical medium on which some
+ information resides
+[x] mediaobject - A displayed media object (video, audio, image, etc.)
+[ ] mediaobjectco - A media object that contains callouts
+[x] member - An element of a simple list
+[x] menuchoice - A selection or series of selections from a menu
+[ ] methodname - The name of a method
+[ ] methodparam - Parameters to a method
+[ ] methodsynopsis - A syntax summary for a method
+[x] mml:math - A MathML equation
+[ ] modespec - Application-specific information necessary for the
+ completion of an OLink
+[ ] modifier - Modifiers in a synopsis
+[ ] mousebutton - The conventional name of a mouse button
+[ ] msg - A message in a message set
+[ ] msgaud - The audience to which a message in a message set is relevant
+[ ] msgentry - A wrapper for an entry in a message set
+[ ] msgexplan - Explanatory material relating to a message in a message set
+[ ] msginfo - Information about a message in a message set
+[ ] msglevel - The level of importance or severity of a message in a message set
+[ ] msgmain - The primary component of a message in a message set
+[ ] msgorig - The origin of a message in a message set
+[ ] msgrel - A related component of a message in a message set
+[ ] msgset - A detailed set of messages, usually error messages
+[ ] msgsub - A subcomponent of a message in a message set
+[ ] msgtext - The actual text of a message component in a message set
+[ ] nonterminal - A non-terminal in an EBNF production
+[x] note - A message set off from the text
+[ ] objectinfo - Meta-information for an object
+[ ] olink - A link that addresses its target indirectly, through an entity
+[ ] ooclass - A class in an object-oriented programming language
+[ ] ooexception - An exception in an object-oriented programming language
+[ ] oointerface - An interface in an object-oriented programming language
+[x] option - An option for a software command
+[x] optional - Optional information
+[x] orderedlist - A list in which each entry is marked with a sequentially
+ incremented label
+[ ] orgdiv - A division of an organization
+[ ] orgname - The name of an organization other than a corporation
+[ ] otheraddr - Uncategorized information in address
+[ ] othercredit - A person or entity, other than an author or editor,
+ credited in a document
+[ ] othername - A component of a persons name that is not a first name,
+ surname, or lineage
+[ ] package - A package
+[ ] pagenums - The numbers of the pages in a book, for use in a bibliographic
+ entry
+[x] para - A paragraph
+[ ] paramdef - Information about a function parameter in a programming language
+[x] parameter - A value or a symbolic reference to a value
+[ ] part - A division in a book
+[ ] partinfo - Meta-information for a Part
+[ ] partintro - An introduction to the contents of a part
+[ ] personblurb - A short description or note about a person
+[ ] personname - The personal name of an individual
+[ ] phone - A telephone number
+[ ] phrase - A span of text
+[ ] pob - A post office box in an address
+[ ] postcode - A postal code in an address
+[x] preface - Introductory matter preceding the first chapter of a book
+[ ] prefaceinfo - Meta-information for a Preface
+[ ] primary - The primary word or phrase under which an index term should be
+ sorted
+[ ] primaryie - A primary term in an index entry, not in the text
+[ ] printhistory - The printing history of a document
+[ ] procedure - A list of operations to be performed in a well-defined sequence
+[ ] production - A production in a set of EBNF productions
+[ ] productionrecap - A cross-reference to an EBNF production
+[ ] productionset - A set of EBNF productions
+[ ] productname - The formal name of a product
+[ ] productnumber - A number assigned to a product
+[x] programlisting - A literal listing of all or part of a program
+[ ] programlistingco - A program listing with associated areas used in callouts
+[x] prompt - A character or string indicating the start of an input field in
+ a computer display
+[ ] property - A unit of data associated with some part of a computer system
+[ ] pubdate - The date of publication of a document
+[ ] publisher - The publisher of a document
+[ ] publishername - The name of the publisher of a document
+[ ] pubsnumber - A number assigned to a publication other than an ISBN or ISSN
+ or inventory part number
+[x] qandadiv - A titled division in a QandASet
+[o] qandaentry - A question/answer set within a QandASet
+[o] qandaset - A question-and-answer set
+[x] question - A question in a QandASet
+[x] quote - An inline quotation
+[ ] refclass - The scope or other indication of applicability of a
+ reference entry
+[ ] refdescriptor - A description of the topic of a reference page
+[ ] refentry - A reference page (originally a UNIX man-style reference page)
+[ ] refentryinfo - Meta-information for a Refentry
+[ ] refentrytitle - The title of a reference page
+[ ] reference - A collection of reference entries
+[ ] referenceinfo - Meta-information for a Reference
+[ ] refmeta - Meta-information for a reference entry
+[ ] refmiscinfo - Meta-information for a reference entry other than the title
+ and volume number
+[ ] refname - The name of (one of) the subject(s) of a reference page
+[ ] refnamediv - The name, purpose, and classification of a reference page
+[ ] refpurpose - A short (one sentence) synopsis of the topic of a reference
+ page
+[x] refsect1 - A major subsection of a reference entry
+[x] refsect1info - Meta-information for a RefSect1
+[x] refsect2 - A subsection of a RefSect1
+[x] refsect2info - Meta-information for a RefSect2
+[x] refsect3 - A subsection of a RefSect2
+[x] refsect3info - Meta-information for a RefSect3
+[x] refsection - A recursive section in a refentry
+[x] refsectioninfo - Meta-information for a refsection
+[ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page
+[ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv
+[x] releaseinfo - Information about a particular release of a document
+[ ] remark - A remark (or comment) intended for presentation in a draft
+ manuscript
+[ ] replaceable - Content that may or must be replaced by the user
+[ ] returnvalue - The value returned by a function
+[ ] revdescription - A extended description of a revision to a document
+[ ] revhistory - A history of the revisions to a document
+[ ] revision - An entry describing a single revision in the history of the
+ revisions to a document
+[ ] revnumber - A document revision number
+[ ] revremark - A description of a revision to a document
+[ ] rhs - The right-hand side of an EBNF production
+[x] row - A row in a table
+[ ] sbr - An explicit line break in a command synopsis
+[x] screen - Text that a user sees or might see on a computer screen
+[o] screenco - A screen with associated areas used in callouts
+[o] screeninfo - Information about how a screen shot was produced
+[ ] screenshot - A representation of what the user sees or might see on a
+ computer screen
+[ ] secondary - A secondary word or phrase in an index term
+[ ] secondaryie - A secondary term in an index entry, rather than in the text
+[x] sect1 - A top-level section of document
+[x] sect1info - Meta-information for a Sect1
+[x] sect2 - A subsection within a Sect1
+[x] sect2info - Meta-information for a Sect2
+[x] sect3 - A subsection within a Sect2
+[x] sect3info - Meta-information for a Sect3
+[x] sect4 - A subsection within a Sect3
+[x] sect4info - Meta-information for a Sect4
+[x] sect5 - A subsection within a Sect4
+[x] sect5info - Meta-information for a Sect5
+[x] section - A recursive section
+[x] sectioninfo - Meta-information for a recursive section
+[x] see - Part of an index term directing the reader instead to another entry
+ in the index
+[x] seealso - Part of an index term directing the reader also to another entry
+ in the index
+[ ] seealsoie - A See also entry in an index, rather than in the text
+[ ] seeie - A See entry in an index, rather than in the text
+[x] seg - An element of a list item in a segmented list
+[x] seglistitem - A list item in a segmented list
+[x] segmentedlist - A segmented list, a list of sets of elements
+[x] segtitle - The title of an element of a list item in a segmented list
+[ ] seriesvolnums - Numbers of the volumes in a series of books
+[ ] set - A collection of books
+[ ] setindex - An index to a set of books
+[ ] setindexinfo - Meta-information for a SetIndex
+[ ] setinfo - Meta-information for a Set
+[ ] sgmltag - A component of SGML markup
+[ ] shortaffil - A brief description of an affiliation
+[ ] shortcut - A key combination for an action that is also accessible through
+ a menu
+[ ] sidebar - A portion of a document that is isolated from the main
+ narrative flow
+[ ] sidebarinfo - Meta-information for a Sidebar
+[x] simpara - A paragraph that contains only text and inline markup, no block
+ elements
+[x] simplelist - An undecorated list of single words or short phrases
+[ ] simplemsgentry - A wrapper for a simpler entry in a message set
+[ ] simplesect - A section of a document with no subdivisions
+[ ] spanspec - Formatting information for a spanned column in a table
+[ ] state - A state or province in an address
+[ ] step - A unit of action in a procedure
+[ ] stepalternatives - Alternative steps in a procedure
+[ ] street - A street address in an address
+[ ] structfield - A field in a structure (in the programming language sense)
+[ ] structname - The name of a structure (in the programming language sense)
+[ ] subject - One of a group of terms describing the subject matter of a
+ document
+[ ] subjectset - A set of terms describing the subject matter of a document
+[ ] subjectterm - A term in a group of terms describing the subject matter of
+ a document
+[x] subscript - A subscript (as in H2O, the molecular formula for water)
+[ ] substeps - A wrapper for steps that occur within steps in a procedure
+[x] subtitle - The subtitle of a document
+[x] superscript - A superscript (as in x2, the mathematical notation for x
+ multiplied by itself)
+[ ] surname - A family name; in western cultures the last name
+[ ] svg:svg - An SVG graphic
+[x] symbol - A name that is replaced by a value before processing
+[ ] synopfragment - A portion of a CmdSynopsis broken out from the main body
+ of the synopsis
+[ ] synopfragmentref - A reference to a fragment of a command synopsis
+[ ] synopsis - A general-purpose element for representing the syntax of
+ commands or functions
+[ ] systemitem - A system-related item or term
+[ ] table - A formal table in a document
+[ ] task - A task to be completed
+[ ] taskprerequisites - The prerequisites for a task
+[ ] taskrelated - Information related to a task
+[ ] tasksummary - A summary of a task
+[x] tbody - A wrapper for the rows of a table or informal table
+[x] td - A table entry in an HTML table
+[x] term - The word or phrase being defined or described in a variable list
+[ ] termdef - An inline term definition
+[ ] tertiary - A tertiary word or phrase in an index term
+[ ] tertiaryie - A tertiary term in an index entry, rather than in the text
+[ ] textdata - Pointer to external text data
+[ ] textobject - A wrapper for a text description of an object and its
+ associated meta-information
+[ ] tfoot - A table footer consisting of one or more rows
+[x] tgroup - A wrapper for the main content of a table, or part of a table
+[x] th - A table header entry in an HTML table
+[x] thead - A table header consisting of one or more rows
+[x] tip - A suggestion to the user, set off from the text
+[x] title - The text of the title of a section of a document or of a formal
+ block-level element
+[x] titleabbrev - The abbreviation of a Title
+[x] toc - A table of contents
+[x] tocback - An entry in a table of contents for a back matter component
+[x] tocchap - An entry in a table of contents for a component in the body of
+ a document
+[x] tocentry - A component title in a table of contents
+[x] tocfront - An entry in a table of contents for a front matter component
+[x] toclevel1 - A top-level entry within a table of contents entry for a
+ chapter-like component
+[x] toclevel2 - A second-level entry within a table of contents entry for a
+ chapter-like component
+[x] toclevel3 - A third-level entry within a table of contents entry for a
+ chapter-like component
+[x] toclevel4 - A fourth-level entry within a table of contents entry for a
+ chapter-like component
+[x] toclevel5 - A fifth-level entry within a table of contents entry for a
+ chapter-like component
+[x] tocpart - An entry in a table of contents for a part of a book
+[ ] token - A unit of information
+[x] tr - A row in an HTML table
+[ ] trademark - A trademark
+[x] type - The classification of a value
+[x] ulink - A link that addresses its target by means of a URL
+ (Uniform Resource Locator)
+[x] uri - A Uniform Resource Identifier
+[x] userinput - Data entered by the user
+[x] varargs - An empty element in a function synopsis indicating a variable
+ number of arguments
+[x] variablelist - A list in which each entry is composed of a set of one or
+ more terms and an associated description
+[x] varlistentry - A wrapper for a set of terms and the associated description
+ in a variable list
+[x] varname - The name of a variable
+[ ] videodata - Pointer to external video data
+[ ] videoobject - A wrapper for video data and its associated meta-information
+[ ] void - An empty element in a function synopsis indicating that the
+ function in question takes no arguments
+[ ] volumenum - The volume number of a document in a set (as of books in a set
+ or articles in a journal)
+[x] warning - An admonition set off from the text
+[x] wordasword - A word meant specifically as a word and not representing
+ anything else
+[x] xref - A cross reference to another part of the document
+[ ] year - The year of publication of a document
+[x] ?asciidoc-br? - line break from asciidoc docbook output
+-}
+
+type DB m = StateT DBState m
+
+data DBState = DBState{ dbSectionLevel :: Int
+ , dbQuoteType :: QuoteType
+ , dbMeta :: Meta
+ , dbAcceptsMeta :: Bool
+ , dbBook :: Bool
+ , dbFigureTitle :: Inlines
+ , dbContent :: [Content]
+ } deriving Show
+
+instance Default DBState where
+ def = DBState{ dbSectionLevel = 0
+ , dbQuoteType = DoubleQuote
+ , dbMeta = mempty
+ , dbAcceptsMeta = False
+ , dbBook = False
+ , dbFigureTitle = mempty
+ , dbContent = [] }
+
+
+readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readDocBook _ inp = do
+ let tree = normalizeTree . parseXML . handleInstructions $ inp
+ (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
+ return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
+
+-- We treat <?asciidoc-br?> specially (issue #1236), converting it
+-- to <br/>, since xml-light doesn't parse the instruction correctly.
+-- Other xml instructions are simply removed from the input stream.
+handleInstructions :: String -> String
+handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
+handleInstructions xs = case break (=='<') xs of
+ (ys, []) -> ys
+ ([], '<':zs) -> '<' : handleInstructions zs
+ (ys, zs) -> ys ++ handleInstructions zs
+
+getFigure :: PandocMonad m => Element -> DB m Blocks
+getFigure e = do
+ tit <- case filterChild (named "title") e of
+ Just t -> getInlines t
+ Nothing -> return mempty
+ modify $ \st -> st{ dbFigureTitle = tit }
+ res <- getBlocks e
+ modify $ \st -> st{ dbFigureTitle = mempty }
+ return res
+
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText (convertEntity r ++ s1) z):xs
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ go xs = xs
+
+convertEntity :: String -> String
+convertEntity e = maybe (map toUpper e) id (lookupEntity e)
+
+-- convenience function to get an attribute value, defaulting to ""
+attrValue :: String -> Element -> String
+attrValue attr elt =
+ case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
+ Just z -> z
+ Nothing -> ""
+
+-- convenience function
+named :: String -> Element -> Bool
+named s e = qName (elName e) == s
+
+--
+
+acceptingMetadata :: PandocMonad m => DB m a -> DB m a
+acceptingMetadata p = do
+ modify (\s -> s { dbAcceptsMeta = True } )
+ res <- p
+ modify (\s -> s { dbAcceptsMeta = False })
+ return res
+
+checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
+checkInMeta p = do
+ accepts <- dbAcceptsMeta <$> get
+ when accepts p
+ return mempty
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
+addMeta field val = modify (setMeta field val)
+
+instance HasMeta DBState where
+ setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)}
+ deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)}
+
+isBlockElement :: Content -> Bool
+isBlockElement (Elem e) = qName (elName e) `elem` blocktags
+ where blocktags = ["toc","index","para","formalpara","simpara",
+ "ackno","epigraph","blockquote","bibliography","bibliodiv",
+ "biblioentry","glossee","glosseealso","glossary",
+ "glossdiv","glosslist","chapter","appendix","preface",
+ "bridgehead","sect1","sect2","sect3","sect4","sect5","section",
+ "refsect1","refsect2","refsect3","refsection",
+ "important","caution","note","tip","warning","qandadiv",
+ "question","answer","abstract","itemizedlist","orderedlist",
+ "variablelist","article","book","table","informaltable",
+ "informalexample", "linegroup",
+ "screen","programlisting","example","calloutlist"]
+isBlockElement _ = False
+
+-- Trim leading and trailing newline characters
+trimNl :: String -> String
+trimNl = reverse . go . reverse . go
+ where go ('\n':xs) = xs
+ go xs = xs
+
+-- meld text into beginning of first paragraph of Blocks.
+-- assumes Blocks start with a Para; if not, does nothing.
+addToStart :: Inlines -> Blocks -> Blocks
+addToStart toadd bs =
+ case toList bs of
+ (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest
+ _ -> bs
+
+-- function that is used by both mediaobject (in parseBlock)
+-- and inlinemediaobject (in parseInline)
+-- A DocBook mediaobject is a wrapper around a set of alternative presentations
+getMediaobject :: PandocMonad m => Element -> DB m Inlines
+getMediaobject e = do
+ (imageUrl, attr) <-
+ case filterChild (named "imageobject") e of
+ Nothing -> return (mempty, nullAttr)
+ Just z -> case filterChild (named "imagedata") z of
+ Nothing -> return (mempty, nullAttr)
+ Just i -> let atVal a = attrValue a i
+ w = case atVal "width" of
+ "" -> []
+ d -> [("width", d)]
+ h = case atVal "depth" of
+ "" -> []
+ d -> [("height", d)]
+ atr = (atVal "id", words $ atVal "role", w ++ h)
+ in return (atVal "fileref", atr)
+ let getCaption el = case filterChild (\x -> named "caption" x
+ || named "textobject" x
+ || named "alt" x) el of
+ Nothing -> return mempty
+ Just z -> mconcat <$> (mapM parseInline $ elContent z)
+ figTitle <- gets dbFigureTitle
+ let (caption, title) = if isNull figTitle
+ then (getCaption e, "")
+ else (return figTitle, "fig:")
+ liftM (imageWith attr imageUrl title) caption
+
+getBlocks :: PandocMonad m => Element -> DB m Blocks
+getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+
+
+parseBlock :: PandocMonad m => Content -> DB m Blocks
+parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
+parseBlock (Text (CData _ s _)) = if all isSpace s
+ then return mempty
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "toc" -> return mempty -- skip TOC, since in pandoc it's autogenerated
+ "index" -> return mempty -- skip index, since page numbers meaningless
+ "para" -> parseMixed para (elContent e)
+ "formalpara" -> do
+ tit <- case filterChild (named "title") e of
+ Just t -> (para . strong . (<> str ".")) <$>
+ getInlines t
+ Nothing -> return mempty
+ (tit <>) <$> parseMixed para (elContent e)
+ "simpara" -> parseMixed para (elContent e)
+ "ackno" -> parseMixed para (elContent e)
+ "epigraph" -> parseBlockquote
+ "blockquote" -> parseBlockquote
+ "attribution" -> return mempty
+ "titleabbrev" -> return mempty
+ "authorinitials" -> return mempty
+ "title" -> checkInMeta getTitle
+ "author" -> checkInMeta getAuthor
+ "authorgroup" -> checkInMeta getAuthorGroup
+ "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
+ "date" -> checkInMeta getDate
+ "bibliography" -> sect 0
+ "bibliodiv" -> sect 1
+ "biblioentry" -> parseMixed para (elContent e)
+ "bibliomixed" -> parseMixed para (elContent e)
+ "glosssee" -> para . (\ils -> text "See " <> ils <> str ".")
+ <$> getInlines e
+ "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".")
+ <$> getInlines e
+ "glossary" -> sect 0
+ "glossdiv" -> definitionList <$>
+ mapM parseGlossEntry (filterChildren (named "glossentry") e)
+ "glosslist" -> definitionList <$>
+ mapM parseGlossEntry (filterChildren (named "glossentry") e)
+ "chapter" -> sect 0
+ "appendix" -> sect 0
+ "preface" -> sect 0
+ "bridgehead" -> para . strong <$> getInlines e
+ "sect1" -> sect 1
+ "sect2" -> sect 2
+ "sect3" -> sect 3
+ "sect4" -> sect 4
+ "sect5" -> sect 5
+ "section" -> gets dbSectionLevel >>= sect . (+1)
+ "refsect1" -> sect 1
+ "refsect2" -> sect 2
+ "refsect3" -> sect 3
+ "refsection" -> gets dbSectionLevel >>= sect . (+1)
+ "important" -> blockQuote . (para (strong $ str "Important") <>)
+ <$> getBlocks e
+ "caution" -> blockQuote . (para (strong $ str "Caution") <>)
+ <$> getBlocks e
+ "note" -> blockQuote . (para (strong $ str "Note") <>)
+ <$> getBlocks e
+ "tip" -> blockQuote . (para (strong $ str "Tip") <>)
+ <$> getBlocks e
+ "warning" -> blockQuote . (para (strong $ str "Warning") <>)
+ <$> getBlocks e
+ "area" -> return mempty
+ "areaset" -> return mempty
+ "areaspec" -> return mempty
+ "qandadiv" -> gets dbSectionLevel >>= sect . (+1)
+ "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e
+ "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e
+ "abstract" -> blockQuote <$> getBlocks e
+ "calloutlist" -> bulletList <$> callouts
+ "itemizedlist" -> bulletList <$> listitems
+ "orderedlist" -> do
+ let listStyle = case attrValue "numeration" e of
+ "arabic" -> Decimal
+ "loweralpha" -> LowerAlpha
+ "upperalpha" -> UpperAlpha
+ "lowerroman" -> LowerRoman
+ "upperroman" -> UpperRoman
+ _ -> Decimal
+ let start = fromMaybe 1 $
+ (attrValue "override" <$> filterElement (named "listitem") e)
+ >>= safeRead
+ orderedListWith (start,listStyle,DefaultDelim)
+ <$> listitems
+ "variablelist" -> definitionList <$> deflistitems
+ "figure" -> getFigure e
+ "mediaobject" -> para <$> getMediaobject e
+ "caption" -> return mempty
+ "info" -> metaBlock
+ "articleinfo" -> metaBlock
+ "sectioninfo" -> return mempty -- keywords & other metadata
+ "refsectioninfo" -> return mempty -- keywords & other metadata
+ "refsect1info" -> return mempty -- keywords & other metadata
+ "refsect2info" -> return mempty -- keywords & other metadata
+ "refsect3info" -> return mempty -- keywords & other metadata
+ "sect1info" -> return mempty -- keywords & other metadata
+ "sect2info" -> return mempty -- keywords & other metadata
+ "sect3info" -> return mempty -- keywords & other metadata
+ "sect4info" -> return mempty -- keywords & other metadata
+ "sect5info" -> return mempty -- keywords & other metadata
+ "chapterinfo" -> return mempty -- keywords & other metadata
+ "glossaryinfo" -> return mempty -- keywords & other metadata
+ "appendixinfo" -> return mempty -- keywords & other metadata
+ "bookinfo" -> metaBlock
+ "article" -> modify (\st -> st{ dbBook = False }) >>
+ getBlocks e
+ "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
+ "table" -> parseTable
+ "informaltable" -> parseTable
+ "informalexample" -> divWith ("", ["informalexample"], []) <$>
+ getBlocks e
+ "linegroup" -> lineBlock <$> lineItems
+ "literallayout" -> codeBlockWithLang
+ "screen" -> codeBlockWithLang
+ "programlisting" -> codeBlockWithLang
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where parseMixed container conts = do
+ let (ils,rest) = break isBlockElement conts
+ ils' <- (trimInlines . mconcat) <$> mapM parseInline ils
+ let p = if ils' == mempty then mempty else container ils'
+ case rest of
+ [] -> return p
+ (r:rs) -> do
+ b <- parseBlock r
+ x <- parseMixed container rs
+ return $ p <> b <> x
+ codeBlockWithLang = do
+ let classes' = case attrValue "language" e of
+ "" -> []
+ x -> [x]
+ return $ codeBlockWith (attrValue "id" e, classes', [])
+ $ trimNl $ strContentRecursive e
+ parseBlockquote = do
+ attrib <- case filterChild (named "attribution") e of
+ Nothing -> return mempty
+ Just z -> (para . (str "— " <>) . mconcat)
+ <$> (mapM parseInline $ elContent z)
+ contents <- getBlocks e
+ return $ blockQuote (contents <> attrib)
+ listitems = mapM getBlocks $ filterChildren (named "listitem") e
+ callouts = mapM getBlocks $ filterChildren (named "callout") e
+ deflistitems = mapM parseVarListEntry $ filterChildren
+ (named "varlistentry") e
+ parseVarListEntry e' = do
+ let terms = filterChildren (named "term") e'
+ let items = filterChildren (named "listitem") e'
+ terms' <- mapM getInlines terms
+ items' <- mapM getBlocks items
+ return (mconcat $ intersperse (str "; ") terms', items')
+ parseGlossEntry e' = do
+ let terms = filterChildren (named "glossterm") e'
+ let items = filterChildren (named "glossdef") e'
+ terms' <- mapM getInlines terms
+ items' <- mapM getBlocks items
+ return (mconcat $ intersperse (str "; ") terms', items')
+ getTitle = do
+ tit <- getInlines e
+ subtit <- case filterChild (named "subtitle") e of
+ Just s -> (text ": " <>) <$>
+ getInlines s
+ Nothing -> return mempty
+ addMeta "title" (tit <> subtit)
+
+ getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
+ getAuthorGroup = do
+ let terms = filterChildren (named "author") e
+ mapM getInlines terms >>= addMeta "author"
+ getDate = getInlines e >>= addMeta "date"
+ parseTable = do
+ let isCaption x = named "title" x || named "caption" x
+ caption <- case filterChild isCaption e of
+ Just t -> getInlines t
+ Nothing -> return mempty
+ let e' = fromMaybe e $ filterChild (named "tgroup") e
+ let isColspec x = named "colspec" x || named "col" x
+ let colspecs = case filterChild (named "colgroup") e' of
+ Just c -> filterChildren isColspec c
+ _ -> filterChildren isColspec e'
+ let isRow x = named "row" x || named "tr" x
+ headrows <- case filterChild (named "thead") e' of
+ Just h -> case filterChild isRow h of
+ Just x -> parseRow x
+ Nothing -> return []
+ Nothing -> return []
+ bodyrows <- case filterChild (named "tbody") e' of
+ Just b -> mapM parseRow
+ $ filterChildren isRow b
+ Nothing -> mapM parseRow
+ $ filterChildren isRow e'
+ let toAlignment c = case findAttr (unqual "align") c of
+ Just "left" -> AlignLeft
+ Just "right" -> AlignRight
+ Just "center" -> AlignCenter
+ _ -> AlignDefault
+ let toWidth c = case findAttr (unqual "colwidth") c of
+ Just w -> fromMaybe 0
+ $ safeRead $ '0': filter (\x ->
+ (x >= '0' && x <= '9')
+ || x == '.') w
+ Nothing -> 0 :: Double
+ let numrows = case bodyrows of
+ [] -> 0
+ xs -> maximum $ map length xs
+ let aligns = case colspecs of
+ [] -> replicate numrows AlignDefault
+ cs -> map toAlignment cs
+ let widths = case colspecs of
+ [] -> replicate numrows 0
+ cs -> let ws = map toWidth cs
+ tot = sum ws
+ in if all (> 0) ws
+ then map (/ tot) ws
+ else replicate numrows 0
+ let headrows' = if null headrows
+ then replicate numrows mempty
+ else headrows
+ return $ table caption (zip aligns widths)
+ headrows' bodyrows
+ isEntry x = named "entry" x || named "td" x || named "th" x
+ parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
+ sect n = do isbook <- gets dbBook
+ let n' = if isbook || n == 0 then n + 1 else n
+ headerText <- case filterChild (named "title") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "title")) of
+ Just t -> getInlines t
+ Nothing -> return mempty
+ modify $ \st -> st{ dbSectionLevel = n }
+ b <- getBlocks e
+ let ident = attrValue "id" e
+ modify $ \st -> st{ dbSectionLevel = n - 1 }
+ return $ headerWith (ident,[],[]) n' headerText <> b
+ lineItems = mapM getInlines $ filterChildren (named "line") e
+ metaBlock = acceptingMetadata (getBlocks e) >> return mempty
+
+getInlines :: PandocMonad m => Element -> DB m Inlines
+getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
+
+strContentRecursive :: Element -> String
+strContentRecursive = strContent .
+ (\e' -> e'{ elContent = map elementToStr $ elContent e' })
+
+elementToStr :: Content -> Content
+elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+elementToStr x = x
+
+parseInline :: PandocMonad m => Content -> DB m Inlines
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) =
+ return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref
+parseInline (Elem e) =
+ case qName (elName e) of
+ "equation" -> equation displayMath
+ "informalequation" -> equation displayMath
+ "inlineequation" -> equation math
+ "subscript" -> subscript <$> innerInlines
+ "superscript" -> superscript <$> innerInlines
+ "inlinemediaobject" -> getMediaobject e
+ "quote" -> do
+ qt <- gets dbQuoteType
+ let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
+ modify $ \st -> st{ dbQuoteType = qt' }
+ contents <- innerInlines
+ modify $ \st -> st{ dbQuoteType = qt }
+ return $ if qt == SingleQuote
+ then singleQuoted contents
+ else doubleQuoted contents
+ "simplelist" -> simpleList
+ "segmentedlist" -> segmentedList
+ "classname" -> codeWithLang
+ "code" -> codeWithLang
+ "filename" -> codeWithLang
+ "literal" -> codeWithLang
+ "computeroutput" -> codeWithLang
+ "prompt" -> codeWithLang
+ "parameter" -> codeWithLang
+ "option" -> codeWithLang
+ "optional" -> do x <- getInlines e
+ return $ str "[" <> x <> str "]"
+ "markup" -> codeWithLang
+ "wordasword" -> emph <$> innerInlines
+ "command" -> codeWithLang
+ "varname" -> codeWithLang
+ "function" -> codeWithLang
+ "type" -> codeWithLang
+ "symbol" -> codeWithLang
+ "constant" -> codeWithLang
+ "userinput" -> codeWithLang
+ "varargs" -> return $ code "(...)"
+ "keycap" -> return (str $ strContent e)
+ "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
+ "menuchoice" -> menuchoice <$> (mapM parseInline $
+ filter isGuiMenu $ elContent e)
+ "xref" -> do
+ content <- dbContent <$> get
+ let linkend = attrValue "linkend" e
+ let title = case attrValue "endterm" e of
+ "" -> maybe "???" xrefTitleByElem
+ (findElementById linkend content)
+ endterm -> maybe "???" strContent
+ (findElementById endterm content)
+ return $ link ('#' : linkend) "" (text title)
+ "email" -> return $ link ("mailto:" ++ strContent e) ""
+ $ str $ strContent e
+ "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ "ulink" -> link (attrValue "url" e) "" <$> innerInlines
+ "link" -> do
+ ils <- innerInlines
+ let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ Just h -> h
+ _ -> ('#' : attrValue "linkend" e)
+ let ils' = if ils == mempty then str href else ils
+ let attr = (attrValue "id" e, words $ attrValue "role" e, [])
+ return $ linkWith attr href "" ils'
+ "foreignphrase" -> emph <$> innerInlines
+ "emphasis" -> case attrValue "role" e of
+ "bold" -> strong <$> innerInlines
+ "strong" -> strong <$> innerInlines
+ "strikethrough" -> strikeout <$> innerInlines
+ _ -> emph <$> innerInlines
+ "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
+ "title" -> return mempty
+ "affiliation" -> return mempty
+ -- Note: this isn't a real docbook tag; it's what we convert
+ -- <?asciidor-br?> to in handleInstructions, above. A kludge to
+ -- work around xml-light's inability to parse an instruction.
+ "br" -> return linebreak
+ _ -> innerInlines
+ where innerInlines = (trimInlines . mconcat) <$>
+ (mapM parseInline $ elContent e)
+ equation constructor = return $ mconcat $
+ map (constructor . writeTeX)
+ $ rights
+ $ map (readMathML . showElement . everywhere (mkT removePrefix))
+ $ filterChildren (\x -> qName (elName x) == "math" &&
+ qPrefix (elName x) == Just "mml") e
+ removePrefix elname = elname { qPrefix = Nothing }
+ codeWithLang = do
+ let classes' = case attrValue "language" e of
+ "" -> []
+ l -> [l]
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+ simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
+ (filterChildren (named "member") e)
+ segmentedList = do
+ tit <- maybe (return mempty) getInlines $ filterChild (named "title") e
+ segtits <- mapM getInlines $ filterChildren (named "segtitle") e
+ segitems <- mapM (mapM getInlines . filterChildren (named "seg"))
+ $ filterChildren (named "seglistitem") e
+ let toSeg = mconcat . zipWith (\x y -> strong (x <> str ":") <> space <>
+ y <> linebreak) segtits
+ let segs = mconcat $ map toSeg segitems
+ let tit' = if tit == mempty
+ then mempty
+ else strong tit <> linebreak
+ return $ linebreak <> tit' <> segs
+ keycombo = spanWith ("",["keycombo"],[]) .
+ mconcat . intersperse (str "+")
+ menuchoice = spanWith ("",["menuchoice"],[]) .
+ mconcat . intersperse (text " > ")
+ isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x ||
+ named "guimenuitem" x
+ isGuiMenu _ = False
+
+ findElementById idString content
+ = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content]
+
+ -- Use the 'xreflabel' attribute for getting the title of a xref link;
+ -- if there's no such attribute, employ some heuristics based on what
+ -- docbook-xsl does.
+ xrefTitleByElem el
+ | not (null xrefLabel) = xrefLabel
+ | otherwise = case qName (elName el) of
+ "chapter" -> descendantContent "title" el
+ "sect1" -> descendantContent "title" el
+ "sect2" -> descendantContent "title" el
+ "sect3" -> descendantContent "title" el
+ "sect4" -> descendantContent "title" el
+ "sect5" -> descendantContent "title" el
+ "cmdsynopsis" -> descendantContent "command" el
+ "funcsynopsis" -> descendantContent "function" el
+ _ -> qName (elName el) ++ "_title"
+ where
+ xrefLabel = attrValue "xreflabel" el
+ descendantContent name = maybe "???" strContent
+ . filterElementName (\n -> qName n == name)
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
new file mode 100644
index 000000000..8936a0403
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -0,0 +1,626 @@
+{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-}
+
+{-
+Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx
+ Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
+to 'Pandoc' document. -}
+
+{-
+Current state of implementation of Docx entities ([x] means
+implemented, [-] means partially implemented):
+
+* Blocks
+
+ - [X] Para
+ - [X] CodeBlock (styled with `SourceCode`)
+ - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
+ indented)
+ - [X] OrderedList
+ - [X] BulletList
+ - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
+ - [X] Header (styled with `Heading#`)
+ - [ ] HorizontalRule
+ - [-] Table (column widths and alignments not yet implemented)
+
+* Inlines
+
+ - [X] Str
+ - [X] Emph (italics and underline both read as Emph)
+ - [X] Strong
+ - [X] Strikeout
+ - [X] Superscript
+ - [X] Subscript
+ - [X] SmallCaps
+ - [ ] Quoted
+ - [ ] Cite
+ - [X] Code (styled with `VerbatimChar`)
+ - [X] Space
+ - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
+ - [X] Math
+ - [X] Link (links to an arbitrary bookmark create a span with the target as
+ id and "anchor" class)
+ - [X] Image
+ - [X] Note (Footnotes and Endnotes are silently combined.)
+-}
+
+module Text.Pandoc.Readers.Docx
+ ( readDocxWithWarnings
+ , readDocx
+ ) where
+
+import Codec.Archive.Zip
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Builder
+import Text.Pandoc.Walk
+import Text.Pandoc.Readers.Docx.Parse
+import Text.Pandoc.Readers.Docx.Lists
+import Text.Pandoc.Readers.Docx.Combine
+import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.List (delete, intersect)
+import Text.TeXMath (writeTeX)
+import Data.Default (Default)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Control.Monad.Reader
+import Control.Monad.State
+import Data.Sequence (ViewL(..), viewl)
+import qualified Data.Sequence as Seq (null)
+#if !(MIN_VERSION_base(4,8,0))
+import Data.Traversable (traverse)
+#endif
+import Text.Pandoc.Error
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Logging
+
+readDocx :: PandocMonad m
+ => ReaderOptions
+ -> B.ByteString
+ -> m Pandoc
+readDocx opts bytes
+ | Right archive <- toArchiveOrFail bytes
+ , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
+ mapM_ (P.report . DocxParserWarning) parserWarnings
+ (meta, blks) <- docxToOutput opts docx
+ return $ Pandoc meta blks
+readDocx _ _ =
+ throwError $ PandocSomeError "couldn't parse docx file"
+
+-- TODO remove this for 2.0:
+readDocxWithWarnings :: PandocMonad m
+ => ReaderOptions
+ -> B.ByteString
+ -> m Pandoc
+readDocxWithWarnings = readDocx
+
+data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag
+ , docxDropCap :: Inlines
+ , docxWarnings :: [String]
+ }
+
+instance Default DState where
+ def = DState { docxAnchorMap = M.empty
+ , docxMediaBag = mempty
+ , docxDropCap = mempty
+ , docxWarnings = []
+ }
+
+data DEnv = DEnv { docxOptions :: ReaderOptions
+ , docxInHeaderBlock :: Bool }
+
+instance Default DEnv where
+ def = DEnv def False
+
+type DocxContext m = ReaderT DEnv (StateT DState m)
+
+evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
+evalDocxContext ctx env st = flip evalStateT st $ flip runReaderT env $ ctx
+
+-- This is empty, but we put it in for future-proofing.
+spansToKeep :: [String]
+spansToKeep = []
+
+divsToKeep :: [String]
+divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+
+metaStyles :: M.Map String String
+metaStyles = M.fromList [ ("Title", "title")
+ , ("Subtitle", "subtitle")
+ , ("Author", "author")
+ , ("Date", "date")
+ , ("Abstract", "abstract")]
+
+sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
+sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+
+isMetaPar :: BodyPart -> Bool
+isMetaPar (Paragraph pPr _) =
+ not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+isMetaPar _ = False
+
+isEmptyPar :: BodyPart -> Bool
+isEmptyPar (Paragraph _ parParts) =
+ all isEmptyParPart parParts
+ where
+ isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
+ isEmptyParPart _ = False
+ isEmptyElem (TextRun s) = trim s == ""
+ isEmptyElem _ = True
+isEmptyPar _ = False
+
+bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
+bodyPartsToMeta' [] = return M.empty
+bodyPartsToMeta' (bp : bps)
+ | (Paragraph pPr parParts) <- bp
+ , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+ , (Just metaField) <- M.lookup c metaStyles = do
+ inlines <- smushInlines <$> mapM parPartToInlines parParts
+ remaining <- bodyPartsToMeta' bps
+ let
+ f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
+ f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+ f m (MetaList mv) = MetaList (m : mv)
+ f m n = MetaList [m, n]
+ return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
+bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
+
+bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
+bodyPartsToMeta bps = do
+ mp <- bodyPartsToMeta' bps
+ let mp' =
+ case M.lookup "author" mp of
+ Just mv -> M.insert "author" (fixAuthors mv) mp
+ Nothing -> mp
+ return $ Meta mp'
+
+fixAuthors :: MetaValue -> MetaValue
+fixAuthors (MetaBlocks blks) =
+ MetaList $ map g $ filter f blks
+ where f (Para _) = True
+ f _ = False
+ g (Para ils) = MetaInlines ils
+ g _ = MetaInlines []
+fixAuthors mv = mv
+
+codeStyles :: [String]
+codeStyles = ["VerbatimChar"]
+
+codeDivs :: [String]
+codeDivs = ["SourceCode"]
+
+runElemToInlines :: RunElem -> Inlines
+runElemToInlines (TextRun s) = text s
+runElemToInlines (LnBrk) = linebreak
+runElemToInlines (Tab) = space
+runElemToInlines (SoftHyphen) = text "\xad"
+runElemToInlines (NoBreakHyphen) = text "\x2011"
+
+runElemToString :: RunElem -> String
+runElemToString (TextRun s) = s
+runElemToString (LnBrk) = ['\n']
+runElemToString (Tab) = ['\t']
+runElemToString (SoftHyphen) = ['\xad']
+runElemToString (NoBreakHyphen) = ['\x2011']
+
+runToString :: Run -> String
+runToString (Run _ runElems) = concatMap runElemToString runElems
+runToString _ = ""
+
+parPartToString :: ParPart -> String
+parPartToString (PlainRun run) = runToString run
+parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
+parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
+parPartToString _ = ""
+
+blacklistedCharStyles :: [String]
+blacklistedCharStyles = ["Hyperlink"]
+
+resolveDependentRunStyle :: RunStyle -> RunStyle
+resolveDependentRunStyle rPr
+ | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
+ rPr
+ | Just (_, cs) <- rStyle rPr =
+ let rPr' = resolveDependentRunStyle cs
+ in
+ RunStyle { isBold = case isBold rPr of
+ Just bool -> Just bool
+ Nothing -> isBold rPr'
+ , isItalic = case isItalic rPr of
+ Just bool -> Just bool
+ Nothing -> isItalic rPr'
+ , isSmallCaps = case isSmallCaps rPr of
+ Just bool -> Just bool
+ Nothing -> isSmallCaps rPr'
+ , isStrike = case isStrike rPr of
+ Just bool -> Just bool
+ Nothing -> isStrike rPr'
+ , rVertAlign = case rVertAlign rPr of
+ Just valign -> Just valign
+ Nothing -> rVertAlign rPr'
+ , rUnderline = case rUnderline rPr of
+ Just ulstyle -> Just ulstyle
+ Nothing -> rUnderline rPr'
+ , rStyle = rStyle rPr }
+ | otherwise = rPr
+
+runStyleToTransform :: RunStyle -> (Inlines -> Inlines)
+runStyleToTransform rPr
+ | Just (s, _) <- rStyle rPr
+ , s `elem` spansToKeep =
+ let rPr' = rPr{rStyle = Nothing}
+ in
+ (spanWith ("", [s], [])) . (runStyleToTransform rPr')
+ | Just True <- isItalic rPr =
+ emph . (runStyleToTransform rPr {isItalic = Nothing})
+ | Just True <- isBold rPr =
+ strong . (runStyleToTransform rPr {isBold = Nothing})
+ | Just True <- isSmallCaps rPr =
+ smallcaps . (runStyleToTransform rPr {isSmallCaps = Nothing})
+ | Just True <- isStrike rPr =
+ strikeout . (runStyleToTransform rPr {isStrike = Nothing})
+ | Just SupScrpt <- rVertAlign rPr =
+ superscript . (runStyleToTransform rPr {rVertAlign = Nothing})
+ | Just SubScrpt <- rVertAlign rPr =
+ subscript . (runStyleToTransform rPr {rVertAlign = Nothing})
+ | Just "single" <- rUnderline rPr =
+ emph . (runStyleToTransform rPr {rUnderline = Nothing})
+ | otherwise = id
+
+runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
+runToInlines (Run rs runElems)
+ | Just (s, _) <- rStyle rs
+ , s `elem` codeStyles =
+ let rPr = resolveDependentRunStyle rs
+ codeString = code $ concatMap runElemToString runElems
+ in
+ return $ case rVertAlign rPr of
+ Just SupScrpt -> superscript codeString
+ Just SubScrpt -> subscript codeString
+ _ -> codeString
+ | otherwise = do
+ let ils = smushInlines (map runElemToInlines runElems)
+ return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
+runToInlines (Footnote bps) = do
+ blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
+ return $ note blksList
+runToInlines (Endnote bps) = do
+ blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps)
+ return $ note blksList
+runToInlines (InlineDrawing fp title alt bs ext) = do
+ (lift . lift) $ P.insertMedia fp Nothing bs
+ return $ imageWith (extentToAttr ext) fp title $ text alt
+runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
+
+extentToAttr :: Extent -> Attr
+extentToAttr (Just (w, h)) =
+ ("", [], [("width", showDim w), ("height", showDim h)] )
+ where
+ showDim d = show (d / 914400) ++ "in"
+extentToAttr _ = nullAttr
+
+blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
+blocksToInlinesWarn cmtId blks = do
+ let blkList = toList blks
+ notParaOrPlain :: Block -> Bool
+ notParaOrPlain (Para _) = False
+ notParaOrPlain (Plain _) = False
+ notParaOrPlain _ = True
+ when (not $ null $ filter notParaOrPlain blkList) $
+ lift $ P.report $ DocxParserWarning $
+ "Docx comment " ++ cmtId ++ " will not retain formatting"
+ return $ fromList $ blocksToInlines blkList
+
+parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
+parPartToInlines (PlainRun r) = runToInlines r
+parPartToInlines (Insertion _ author date runs) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AcceptChanges -> smushInlines <$> mapM runToInlines runs
+ RejectChanges -> return mempty
+ AllChanges -> do
+ ils <- smushInlines <$> mapM runToInlines runs
+ let attr = ("", ["insertion"], [("author", author), ("date", date)])
+ return $ spanWith attr ils
+parPartToInlines (Deletion _ author date runs) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AcceptChanges -> return mempty
+ RejectChanges -> smushInlines <$> mapM runToInlines runs
+ AllChanges -> do
+ ils <- smushInlines <$> mapM runToInlines runs
+ let attr = ("", ["deletion"], [("author", author), ("date", date)])
+ return $ spanWith attr ils
+parPartToInlines (CommentStart cmtId author date bodyParts) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AllChanges -> do
+ blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
+ ils <- blocksToInlinesWarn cmtId blks
+ let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
+ return $ spanWith attr ils
+ _ -> return mempty
+parPartToInlines (CommentEnd cmtId) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AllChanges -> do
+ let attr = ("", ["comment-end"], [("id", cmtId)])
+ return $ spanWith attr mempty
+ _ -> return mempty
+parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
+ return mempty
+parPartToInlines (BookMark _ anchor) =
+ -- We record these, so we can make sure not to overwrite
+ -- user-defined anchor links with header auto ids.
+ do
+ -- get whether we're in a header.
+ inHdrBool <- asks docxInHeaderBlock
+ -- Get the anchor map.
+ anchorMap <- gets docxAnchorMap
+ -- We don't want to rewrite if we're in a header, since we'll take
+ -- care of that later, when we make the header anchor. If the
+ -- bookmark were already in uniqueIdent form, this would lead to a
+ -- duplication. Otherwise, we check to see if the id is already in
+ -- there. Rewrite if necessary. This will have the possible effect
+ -- of rewriting user-defined anchor links. However, since these
+ -- are not defined in pandoc, it seems like a necessary evil to
+ -- avoid an extra pass.
+ let newAnchor =
+ if not inHdrBool && anchor `elem` (M.elems anchorMap)
+ then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
+ else anchor
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
+ return $ spanWith (newAnchor, ["anchor"], []) mempty
+parPartToInlines (Drawing fp title alt bs ext) = do
+ (lift . lift) $ P.insertMedia fp Nothing bs
+ return $ imageWith (extentToAttr ext) fp title $ text alt
+parPartToInlines Chart = do
+ return $ spanWith ("", ["chart"], []) $ text "[CHART]"
+parPartToInlines (InternalHyperLink anchor runs) = do
+ ils <- smushInlines <$> mapM runToInlines runs
+ return $ link ('#' : anchor) "" ils
+parPartToInlines (ExternalHyperLink target runs) = do
+ ils <- smushInlines <$> mapM runToInlines runs
+ return $ link target "" ils
+parPartToInlines (PlainOMath exps) = do
+ return $ math $ writeTeX exps
+parPartToInlines (SmartTag runs) = do
+ ils <- smushInlines <$> mapM runToInlines runs
+ return ils
+
+isAnchorSpan :: Inline -> Bool
+isAnchorSpan (Span (_, classes, kvs) _) =
+ classes == ["anchor"] &&
+ null kvs
+isAnchorSpan _ = False
+
+dummyAnchors :: [String]
+dummyAnchors = ["_GoBack"]
+
+makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
+makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
+
+makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
+-- If there is an anchor already there (an anchor span in the header,
+-- to be exact), we rename and associate the new id with the old one.
+makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
+ | (c:_) <- filter isAnchorSpan ils
+ , (Span (anchIdent, ["anchor"], _) cIls) <- c = do
+ hdrIDMap <- gets docxAnchorMap
+ let newIdent = if null ident
+ then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
+ else ident
+ newIls = concatMap f ils where f il | il == c = cIls
+ | otherwise = [il]
+ modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
+ makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
+-- Otherwise we just give it a name, and register that name (associate
+-- it with itself.)
+makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
+ do
+ hdrIDMap <- gets docxAnchorMap
+ let newIdent = if null ident
+ then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
+ else ident
+ modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
+ return $ Header n (newIdent, classes, kvs) ils
+makeHeaderAnchor' blk = return blk
+
+-- Rewrite a standalone paragraph block as a plain
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain blks
+ | (Para (ils) :< seeq) <- viewl $ unMany blks
+ , Seq.null seeq =
+ singleton $ Plain ils
+singleParaToPlain blks = blks
+
+cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
+cellToBlocks (Cell bps) = do
+ blks <- smushBlocks <$> mapM bodyPartToBlocks bps
+ return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+
+rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
+rowToBlocksList (Row cells) = do
+ blksList <- mapM cellToBlocks cells
+ return $ map singleParaToPlain blksList
+
+trimLineBreaks :: [Inline] -> [Inline]
+trimLineBreaks [] = []
+trimLineBreaks (LineBreak : ils) = trimLineBreaks ils
+trimLineBreaks ils
+ | (LineBreak : ils') <- reverse ils = trimLineBreaks (reverse ils')
+trimLineBreaks ils = ils
+
+parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks)
+parStyleToTransform pPr
+ | (c:cs) <- pStyle pPr
+ , c `elem` divsToKeep =
+ let pPr' = pPr { pStyle = cs }
+ in
+ (divWith ("", [c], [])) . (parStyleToTransform pPr')
+ | (c:cs) <- pStyle pPr,
+ c `elem` listParagraphDivs =
+ let pPr' = pPr { pStyle = cs, indentation = Nothing}
+ in
+ (divWith ("", [c], [])) . (parStyleToTransform pPr')
+ | (_:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr =
+ let pPr' = pPr { pStyle = cs }
+ in
+ blockQuote . (parStyleToTransform pPr')
+ | (_:cs) <- pStyle pPr =
+ let pPr' = pPr { pStyle = cs}
+ in
+ parStyleToTransform pPr'
+ | null (pStyle pPr)
+ , Just left <- indentation pPr >>= leftParIndent
+ , Just hang <- indentation pPr >>= hangingParIndent =
+ let pPr' = pPr { indentation = Nothing }
+ in
+ case (left - hang) > 0 of
+ True -> blockQuote . (parStyleToTransform pPr')
+ False -> parStyleToTransform pPr'
+ | null (pStyle pPr),
+ Just left <- indentation pPr >>= leftParIndent =
+ let pPr' = pPr { indentation = Nothing }
+ in
+ case left > 0 of
+ True -> blockQuote . (parStyleToTransform pPr')
+ False -> parStyleToTransform pPr'
+parStyleToTransform _ = id
+
+bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
+bodyPartToBlocks (Paragraph pPr parparts)
+ | not $ null $ codeDivs `intersect` (pStyle pPr) =
+ return
+ $ parStyleToTransform pPr
+ $ codeBlock
+ $ concatMap parPartToString parparts
+ | Just (style, n) <- pHeading pPr = do
+ ils <- local (\s-> s{docxInHeaderBlock=True}) $
+ (smushInlines <$> mapM parPartToInlines parparts)
+ makeHeaderAnchor $
+ headerWith ("", delete style (pStyle pPr), []) n ils
+ | otherwise = do
+ ils <- smushInlines <$> mapM parPartToInlines parparts >>=
+ (return . fromList . trimLineBreaks . normalizeSpaces . toList)
+ dropIls <- gets docxDropCap
+ let ils' = dropIls <> ils
+ if dropCap pPr
+ then do modify $ \s -> s { docxDropCap = ils' }
+ return mempty
+ else do modify $ \s -> s { docxDropCap = mempty }
+ return $ case isNull ils' of
+ True -> mempty
+ _ -> parStyleToTransform pPr $ para ils'
+bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
+ let
+ kvs = case levelInfo of
+ (_, fmt, txt, Just start) -> [ ("level", lvl)
+ , ("num-id", numId)
+ , ("format", fmt)
+ , ("text", txt)
+ , ("start", (show start))
+ ]
+
+ (_, fmt, txt, Nothing) -> [ ("level", lvl)
+ , ("num-id", numId)
+ , ("format", fmt)
+ , ("text", txt)
+ ]
+ blks <- bodyPartToBlocks (Paragraph pPr parparts)
+ return $ divWith ("", ["list-item"], kvs) blks
+bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
+ let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)}
+ in
+ bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (Tbl _ _ _ []) =
+ return $ para mempty
+bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
+ let caption = text cap
+ (hdr, rows) = case firstRowFormatting look of
+ True | null rs -> (Nothing, [r])
+ | otherwise -> (Just r, rs)
+ False -> (Nothing, r:rs)
+
+ cells <- mapM rowToBlocksList rows
+
+ let width = case cells of
+ r':_ -> length r'
+ -- shouldn't happen
+ [] -> 0
+
+ hdrCells <- case hdr of
+ Just r' -> rowToBlocksList r'
+ Nothing -> return $ replicate width mempty
+
+ -- The two following variables (horizontal column alignment and
+ -- relative column widths) go to the default at the
+ -- moment. Width information is in the TblGrid field of the Tbl,
+ -- so should be possible. Alignment might be more difficult,
+ -- since there doesn't seem to be a column entity in docx.
+ let alignments = replicate width AlignDefault
+ widths = replicate width 0 :: [Double]
+
+ return $ table caption (zip alignments widths) hdrCells cells
+bodyPartToBlocks (OMathPara e) = do
+ return $ para $ displayMath (writeTeX e)
+
+
+-- replace targets with generated anchors.
+rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
+rewriteLink' l@(Link attr ils ('#':target, title)) = do
+ anchorMap <- gets docxAnchorMap
+ return $ case M.lookup target anchorMap of
+ Just newTarget -> (Link attr ils ('#':newTarget, title))
+ Nothing -> l
+rewriteLink' il = return il
+
+rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
+rewriteLinks = mapM (walkM rewriteLink')
+
+bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
+bodyToOutput (Body bps) = do
+ let (metabps, blkbps) = sepBodyParts bps
+ meta <- bodyPartsToMeta metabps
+ blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
+ blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
+ return $ (meta, blks')
+
+docxToOutput :: PandocMonad m
+ => ReaderOptions
+ -> Docx
+ -> m (Meta, [Block])
+docxToOutput opts (Docx (Document _ body)) =
+ let dEnv = def { docxOptions = opts} in
+ evalDocxContext (bodyToOutput body) dEnv def
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
new file mode 100644
index 000000000..39e0df825
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -0,0 +1,154 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
+ PatternGuards #-}
+
+module Text.Pandoc.Readers.Docx.Combine ( smushInlines
+ , smushBlocks
+ )
+ where
+
+import Text.Pandoc.Builder
+import Data.List
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>))
+import qualified Data.Sequence as Seq (null)
+
+data Modifier a = Modifier (a -> a)
+ | AttrModifier (Attr -> a -> a) Attr
+ | NullModifier
+
+spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
+spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
+ where (l, m, r) = spaceOutInlines ms
+ (fs, m') = unstackInlines m
+
+spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
+spaceOutInlines ils =
+ let (fs, ils') = unstackInlines ils
+ contents = unMany ils'
+ left = case viewl contents of
+ (Space :< _) -> space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> space
+ _ -> mempty in
+ (left, (stackInlines fs $ trimInlines . Many $ contents), right)
+
+stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
+stackInlines [] ms = ms
+stackInlines (NullModifier : fs) ms = stackInlines fs ms
+stackInlines ((Modifier f) : fs) ms =
+ if isEmpty ms
+ then stackInlines fs ms
+ else f $ stackInlines fs ms
+stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms
+
+unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
+unstackInlines ms = case ilModifier ms of
+ NullModifier -> ([], ms)
+ _ -> (f : fs, ms') where
+ f = ilModifier ms
+ (fs, ms') = unstackInlines $ ilInnards ms
+
+ilModifier :: Inlines -> Modifier Inlines
+ilModifier ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph _) -> Modifier emph
+ (Strong _) -> Modifier strong
+ (SmallCaps _) -> Modifier smallcaps
+ (Strikeout _) -> Modifier strikeout
+ (Superscript _) -> Modifier superscript
+ (Subscript _) -> Modifier subscript
+ (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
+ (Span attr _) -> AttrModifier spanWith attr
+ _ -> NullModifier
+ _ -> NullModifier
+
+ilInnards :: Inlines -> Inlines
+ilInnards ils = case viewl (unMany ils) of
+ (x :< xs) | Seq.null xs -> case x of
+ (Emph lst) -> fromList lst
+ (Strong lst) -> fromList lst
+ (SmallCaps lst) -> fromList lst
+ (Strikeout lst) -> fromList lst
+ (Superscript lst) -> fromList lst
+ (Subscript lst) -> fromList lst
+ (Link _ lst _) -> fromList lst
+ (Span _ lst) -> fromList lst
+ _ -> ils
+ _ -> ils
+
+inlinesL :: Inlines -> (Inlines, Inlines)
+inlinesL ils = case viewl $ unMany ils of
+ (s :< sq) -> (singleton s, Many sq)
+ _ -> (mempty, ils)
+
+inlinesR :: Inlines -> (Inlines, Inlines)
+inlinesR ils = case viewr $ unMany ils of
+ (sq :> s) -> (Many sq, singleton s)
+ _ -> (ils, mempty)
+
+combineInlines :: Inlines -> Inlines -> Inlines
+combineInlines x y =
+ let (xs', x') = inlinesR x
+ (y', ys') = inlinesL y
+ in
+ xs' <> (combineSingletonInlines x' y') <> ys'
+
+combineSingletonInlines :: Inlines -> Inlines -> Inlines
+combineSingletonInlines x y =
+ let (xfs, xs) = unstackInlines x
+ (yfs, ys) = unstackInlines y
+ shared = xfs `intersect` yfs
+ x_remaining = xfs \\ shared
+ y_remaining = yfs \\ shared
+ x_rem_attr = filter isAttrModifier x_remaining
+ y_rem_attr = filter isAttrModifier y_remaining
+ in
+ case null shared of
+ True | isEmpty xs && isEmpty ys ->
+ stackInlines (x_rem_attr ++ y_rem_attr) mempty
+ | isEmpty xs ->
+ let (sp, y') = spaceOutInlinesL y in
+ (stackInlines x_rem_attr mempty) <> sp <> y'
+ | isEmpty ys ->
+ let (x', sp) = spaceOutInlinesR x in
+ x' <> sp <> (stackInlines y_rem_attr mempty)
+ | otherwise ->
+ let (x', xsp) = spaceOutInlinesR x
+ (ysp, y') = spaceOutInlinesL y
+ in
+ x' <> xsp <> ysp <> y'
+ False -> stackInlines shared $
+ combineInlines
+ (stackInlines x_remaining xs)
+ (stackInlines y_remaining ys)
+
+combineBlocks :: Blocks -> Blocks -> Blocks
+combineBlocks bs cs
+ | bs' :> (BlockQuote bs'') <- viewr (unMany bs)
+ , (BlockQuote cs'') :< cs' <- viewl (unMany cs) =
+ Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs'
+combineBlocks bs cs = bs <> cs
+
+instance (Monoid a, Eq a) => Eq (Modifier a) where
+ (Modifier f) == (Modifier g) = (f mempty == g mempty)
+ (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty)
+ (NullModifier) == (NullModifier) = True
+ _ == _ = False
+
+isEmpty :: (Monoid a, Eq a) => a -> Bool
+isEmpty x = x == mempty
+
+isAttrModifier :: Modifier a -> Bool
+isAttrModifier (AttrModifier _ _) = True
+isAttrModifier _ = False
+
+smushInlines :: [Inlines] -> Inlines
+smushInlines xs = foldl combineInlines mempty xs
+
+smushBlocks :: [Blocks] -> Blocks
+smushBlocks xs = foldl combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
new file mode 100644
index 000000000..395a53907
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -0,0 +1,229 @@
+{-
+Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Lists
+ Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for converting flat docx paragraphs into nested lists.
+-}
+
+module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
+ , blocksToDefinitions
+ , listParagraphDivs
+ ) where
+
+import Text.Pandoc.JSON
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.Shared (trim)
+import Control.Monad
+import Data.List
+import Data.Maybe
+
+isListItem :: Block -> Bool
+isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True
+isListItem _ = False
+
+getLevel :: Block -> Maybe Integer
+getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs
+getLevel _ = Nothing
+
+getLevelN :: Block -> Integer
+getLevelN b = case getLevel b of
+ Just n -> n
+ Nothing -> -1
+
+getNumId :: Block -> Maybe Integer
+getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs
+getNumId _ = Nothing
+
+getNumIdN :: Block -> Integer
+getNumIdN b = case getNumId b of
+ Just n -> n
+ Nothing -> -1
+
+getText :: Block -> Maybe String
+getText (Div (_, _, kvs) _) = lookup "text" kvs
+getText _ = Nothing
+
+data ListType = Itemized | Enumerated ListAttributes
+
+listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap = [("upperLetter", UpperAlpha),
+ ("lowerLetter", LowerAlpha),
+ ("upperRoman", UpperRoman),
+ ("lowerRoman", LowerRoman),
+ ("decimal", Decimal)]
+
+listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap = [("%1)", OneParen),
+ ("(%1)", TwoParens),
+ ("%1.", Period)]
+
+getListType :: Block -> Maybe ListType
+getListType b@(Div (_, _, kvs) _) | isListItem b =
+ let
+ start = lookup "start" kvs
+ frmt = lookup "format" kvs
+ txt = lookup "text" kvs
+ in
+ case frmt of
+ Just "bullet" -> Just Itemized
+ Just f ->
+ case txt of
+ Just t -> Just $ Enumerated (
+ read (fromMaybe "1" start) :: Int,
+ fromMaybe DefaultStyle (lookup f listStyleMap),
+ fromMaybe DefaultDelim (lookup t listDelimMap))
+ Nothing -> Nothing
+ _ -> Nothing
+getListType _ = Nothing
+
+listParagraphDivs :: [String]
+listParagraphDivs = ["ListParagraph"]
+
+-- This is a first stab at going through and attaching meaning to list
+-- paragraphs, without an item marker, following a list item. We
+-- assume that these are paragraphs in the same item.
+
+handleListParagraphs :: [Block] -> [Block]
+handleListParagraphs [] = []
+handleListParagraphs (
+ (Div attr1@(_, classes1, _) blks1) :
+ (Div (ident2, classes2, kvs2) blks2) :
+ blks
+ ) | "list-item" `elem` classes1 &&
+ not ("list-item" `elem` classes2) &&
+ (not . null) (listParagraphDivs `intersect` classes2) =
+ -- We don't want to keep this indent.
+ let newDiv2 =
+ (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2)
+ in
+ handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
+handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
+
+separateBlocks' :: Block -> [[Block]] -> [[Block]]
+separateBlocks' blk ([] : []) = [[blk]]
+separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]]
+-- The following is for the invisible bullet lists. This is how
+-- pandoc-generated ooxml does multiparagraph item lists.
+separateBlocks' b acc | liftM trim (getText b) == Just "" =
+ (init acc) ++ [(last acc) ++ [b]]
+separateBlocks' b acc = acc ++ [[b]]
+
+separateBlocks :: [Block] -> [[Block]]
+separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)
+
+flatToBullets' :: Integer -> [Block] -> [Block]
+flatToBullets' _ [] = []
+flatToBullets' num xs@(b : elems)
+ | getLevelN b == num = b : (flatToBullets' num elems)
+ | otherwise =
+ let bNumId = getNumIdN b
+ bLevel = getLevelN b
+ (children, remaining) =
+ span
+ (\b' ->
+ ((getLevelN b') > bLevel ||
+ ((getLevelN b') == bLevel && (getNumIdN b') == bNumId)))
+ xs
+ in
+ case getListType b of
+ Just (Enumerated attr) ->
+ (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) :
+ (flatToBullets' num remaining)
+ _ ->
+ (BulletList (separateBlocks $ flatToBullets' bLevel children)) :
+ (flatToBullets' num remaining)
+
+flatToBullets :: [Block] -> [Block]
+flatToBullets elems = flatToBullets' (-1) elems
+
+singleItemHeaderToHeader :: Block -> Block
+singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
+singleItemHeaderToHeader blk = blk
+
+
+blocksToBullets :: [Block] -> [Block]
+blocksToBullets blks =
+ map singleItemHeaderToHeader $
+ bottomUp removeListDivs $
+ flatToBullets $ (handleListParagraphs blks)
+
+plainParaInlines :: Block -> [Inline]
+plainParaInlines (Plain ils) = ils
+plainParaInlines (Para ils) = ils
+plainParaInlines _ = []
+
+blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block]
+blocksToDefinitions' [] acc [] = reverse acc
+blocksToDefinitions' defAcc acc [] =
+ reverse $ (DefinitionList (reverse defAcc)) : acc
+blocksToDefinitions' defAcc acc
+ ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks)
+ | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 =
+ let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+ pair = case remainingAttr2 == ("", [], []) of
+ True -> (concatMap plainParaInlines blks1, [blks2])
+ False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
+ in
+ blocksToDefinitions' (pair : defAcc) acc blks
+blocksToDefinitions' defAcc acc
+ ((Div (ident2, classes2, kvs2) blks2) : blks)
+ | (not . null) defAcc && "Definition" `elem` classes2 =
+ let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
+ defItems2 = case remainingAttr2 == ("", [], []) of
+ True -> blks2
+ False -> [Div remainingAttr2 blks2]
+ ((defTerm, defItems):defs) = defAcc
+ defAcc' = case null defItems of
+ True -> (defTerm, [defItems2]) : defs
+ False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
+ in
+ blocksToDefinitions' defAcc' acc blks
+blocksToDefinitions' [] acc (b:blks) =
+ blocksToDefinitions' [] (b:acc) blks
+blocksToDefinitions' defAcc acc (b:blks) =
+ blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+
+removeListDivs' :: Block -> [Block]
+removeListDivs' (Div (ident, classes, kvs) blks)
+ | "list-item" `elem` classes =
+ case delete "list-item" classes of
+ [] -> blks
+ classes' -> [Div (ident, classes', kvs) $ blks]
+removeListDivs' (Div (ident, classes, kvs) blks)
+ | not $ null $ listParagraphDivs `intersect` classes =
+ case classes \\ listParagraphDivs of
+ [] -> blks
+ classes' -> [Div (ident, classes', kvs) blks]
+removeListDivs' blk = [blk]
+
+removeListDivs :: [Block] -> [Block]
+removeListDivs = concatMap removeListDivs'
+
+
+
+blocksToDefinitions :: [Block] -> [Block]
+blocksToDefinitions = blocksToDefinitions' [] []
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
new file mode 100644
index 000000000..221a1d10a
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -0,0 +1,1044 @@
+{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
+
+{-
+Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Parse
+ Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of docx archive into Docx haskell type
+-}
+
+module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
+ , Document(..)
+ , Body(..)
+ , BodyPart(..)
+ , TblLook(..)
+ , Extent
+ , ParPart(..)
+ , Run(..)
+ , RunElem(..)
+ , Notes
+ , Numbering
+ , Relationship
+ , Media
+ , RunStyle(..)
+ , VertAlign(..)
+ , ParIndentation(..)
+ , ParagraphStyle(..)
+ , Row(..)
+ , Cell(..)
+ , archiveToDocx
+ , archiveToDocxWithWarnings
+ ) where
+import Codec.Archive.Zip
+import Text.XML.Light
+import Data.Maybe
+import Data.List
+import System.FilePath
+import Data.Bits ((.|.))
+import qualified Data.ByteString.Lazy as B
+import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad.Reader
+import Control.Monad.State
+import Control.Applicative ((<|>))
+import qualified Data.Map as M
+import Control.Monad.Except
+import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive)
+import Text.TeXMath.Readers.OMML (readOMML)
+import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..))
+import Text.TeXMath (Exp)
+import Text.Pandoc.Readers.Docx.Util
+import Data.Char (readLitChar, ord, chr, isDigit)
+
+data ReaderEnv = ReaderEnv { envNotes :: Notes
+ , envComments :: Comments
+ , envNumbering :: Numbering
+ , envRelationships :: [Relationship]
+ , envMedia :: Media
+ , envFont :: Maybe Font
+ , envCharStyles :: CharStyleMap
+ , envParStyles :: ParStyleMap
+ , envLocation :: DocumentLocation
+ }
+ deriving Show
+
+data ReaderState = ReaderState { stateWarnings :: [String] }
+ deriving Show
+
+data DocxError = DocxError | WrongElem
+ deriving Show
+
+type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
+
+runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState)
+runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
+
+maybeToD :: Maybe a -> D a
+maybeToD (Just a) = return a
+maybeToD Nothing = throwError DocxError
+
+eitherToD :: Either a b -> D b
+eitherToD (Right b) = return b
+eitherToD (Left _) = throwError DocxError
+
+concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs = liftM concat (mapM f xs)
+
+
+-- This is similar to `mapMaybe`: it maps a function returning the D
+-- monad over a list, and only keeps the non-erroring return values.
+mapD :: (a -> D b) -> [a] -> D [b]
+mapD f xs =
+ let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return [])
+ in
+ concatMapM handler xs
+
+data Docx = Docx Document
+ deriving Show
+
+data Document = Document NameSpaces Body
+ deriving Show
+
+data Body = Body [BodyPart]
+ deriving Show
+
+type Media = [(FilePath, B.ByteString)]
+
+type CharStyle = (String, RunStyle)
+
+type ParStyle = (String, ParStyleData)
+
+type CharStyleMap = M.Map String RunStyle
+
+type ParStyleMap = M.Map String ParStyleData
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+ deriving Show
+
+data Numb = Numb String String -- right now, only a key to an abstract num
+ deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+ deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+data DocumentLocation = InDocument | InFootnote | InEndnote
+ deriving (Eq,Show)
+
+data Relationship = Relationship DocumentLocation RelId Target
+ deriving Show
+
+data Notes = Notes NameSpaces
+ (Maybe (M.Map String Element))
+ (Maybe (M.Map String Element))
+ deriving Show
+
+data Comments = Comments NameSpaces (M.Map String Element)
+ deriving Show
+
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+ , rightParIndent :: Maybe Integer
+ , hangingParIndent :: Maybe Integer}
+ deriving Show
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+ , indentation :: Maybe ParIndentation
+ , dropCap :: Bool
+ , pHeading :: Maybe (String, Int)
+ , pNumInfo :: Maybe (String, String)
+ , pBlockQuote :: Maybe Bool
+ }
+ deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+ , indentation = Nothing
+ , dropCap = False
+ , pHeading = Nothing
+ , pNumInfo = Nothing
+ , pBlockQuote = Nothing
+ }
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+ | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
+ | Tbl String TblGrid TblLook [Row]
+ | OMathPara [Exp]
+ deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+ deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+data Row = Row [Cell]
+ deriving Show
+
+data Cell = Cell [BodyPart]
+ deriving Show
+
+-- (width, height) in EMUs
+type Extent = Maybe (Double, Double)
+
+data ParPart = PlainRun Run
+ | Insertion ChangeId Author ChangeDate [Run]
+ | Deletion ChangeId Author ChangeDate [Run]
+ | CommentStart CommentId Author CommentDate [BodyPart]
+ | CommentEnd CommentId
+ | BookMark BookMarkId Anchor
+ | InternalHyperLink Anchor [Run]
+ | ExternalHyperLink URL [Run]
+ | Drawing FilePath String String B.ByteString Extent -- title, alt
+ | Chart -- placeholder for now
+ | PlainOMath [Exp]
+ | SmartTag [Run]
+ deriving Show
+
+data Run = Run RunStyle [RunElem]
+ | Footnote [BodyPart]
+ | Endnote [BodyPart]
+ | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
+ | InlineChart -- placeholder
+ deriving Show
+
+data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
+ deriving Show
+
+data VertAlign = BaseLn | SupScrpt | SubScrpt
+ deriving Show
+
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
+ , isSmallCaps :: Maybe Bool
+ , isStrike :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rStyle :: Maybe CharStyle}
+ deriving Show
+
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+ , isBlockQuote :: Maybe Bool
+ , numInfo :: Maybe (String, String)
+ , psStyle :: Maybe ParStyle}
+ deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = Nothing
+ , isItalic = Nothing
+ , isSmallCaps = Nothing
+ , isStrike = Nothing
+ , rVertAlign = Nothing
+ , rUnderline = Nothing
+ , rStyle = Nothing}
+
+type Target = String
+type Anchor = String
+type URL = String
+type BookMarkId = String
+type RelId = String
+type ChangeId = String
+type CommentId = String
+type Author = String
+type ChangeDate = String
+type CommentDate = String
+
+archiveToDocx :: Archive -> Either DocxError Docx
+archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
+
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings archive = do
+ let notes = archiveToNotes archive
+ comments = archiveToComments archive
+ numbering = archiveToNumbering archive
+ rels = archiveToRelationships archive
+ media = filteredFilesFromArchive archive filePathIsMedia
+ (styles, parstyles) = archiveToStyles archive
+ rEnv =
+ ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
+ rState = ReaderState { stateWarnings = [] }
+ (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
+ case eitherDoc of
+ Right doc -> Right (Docx doc, stateWarnings st)
+ Left e -> Left e
+
+
+
+archiveToDocument :: Archive -> D Document
+archiveToDocument zf = do
+ entry <- maybeToD $ findEntryByPath "word/document.xml" zf
+ docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ let namespaces = elemToNameSpaces docElem
+ bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
+ body <- elemToBody namespaces bodyElem
+ return $ Document namespaces body
+
+elemToBody :: NameSpaces -> Element -> D Body
+elemToBody ns element | isElem ns "w" "body" element =
+ mapD (elemToBodyPart ns) (elChildren element) >>=
+ (\bps -> return $ Body bps)
+elemToBody _ _ = throwError WrongElem
+
+archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
+archiveToStyles zf =
+ let stylesElem = findEntryByPath "word/styles.xml" zf >>=
+ (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ in
+ case stylesElem of
+ Nothing -> (M.empty, M.empty)
+ Just styElem ->
+ let namespaces = elemToNameSpaces styElem
+ in
+ ( M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe CharStyle),
+ M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe ParStyle) )
+
+isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
+isBasedOnStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
+ findAttrByName ns "w" "val"
+ , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
+ | isElem ns "w" "style" element
+ , Just styleType <- findAttrByName ns "w" "type" element
+ , styleType == cStyleType parentStyle
+ , Nothing <- findChildByName ns "w" "basedOn" element
+ , Nothing <- parentStyle = True
+ | otherwise = False
+
+class ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+ getStyleId :: a -> String
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
+ Just (styleId, elemToRunStyle ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
+ Just (styleId, elemToParStyleData ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+getStyleChildren ns element parentStyle
+ | isElem ns "w" "styles" element =
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
+ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
+ | otherwise = []
+
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
+buildBasedOnList ns element rootStyle =
+ case (getStyleChildren ns element rootStyle) of
+ [] -> []
+ stys -> stys ++
+ (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+ let fnElem = findEntryByPath "word/footnotes.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ enElem = findEntryByPath "word/endnotes.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ fn_namespaces = case fnElem of
+ Just e -> elemToNameSpaces e
+ Nothing -> []
+ en_namespaces = case enElem of
+ Just e -> elemToNameSpaces e
+ Nothing -> []
+ ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+ fn = fnElem >>= (elemToNotes ns "footnote")
+ en = enElem >>= (elemToNotes ns "endnote")
+ in
+ Notes ns fn en
+
+archiveToComments :: Archive -> Comments
+archiveToComments zf =
+ let cmtsElem = findEntryByPath "word/comments.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ cmts_namespaces = case cmtsElem of
+ Just e -> elemToNameSpaces e
+ Nothing -> []
+ cmts = (elemToComments cmts_namespaces) <$> cmtsElem
+ in
+ case cmts of
+ Just c -> Comments cmts_namespaces c
+ Nothing -> Comments cmts_namespaces M.empty
+
+filePathToRelType :: FilePath -> Maybe DocumentLocation
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
+
+relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
+relElemToRelationship relType element | qName (elName element) == "Relationship" =
+ do
+ relId <- findAttr (QName "Id" Nothing Nothing) element
+ target <- findAttr (QName "Target" Nothing Nothing) element
+ return $ Relationship relType relId target
+relElemToRelationship _ _ = Nothing
+
+filePathToRelationships :: Archive -> FilePath -> [Relationship]
+filePathToRelationships ar fp | Just relType <- filePathToRelType fp
+ , Just entry <- findEntryByPath fp ar
+ , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+ mapMaybe (relElemToRelationship relType) $ elChildren relElems
+filePathToRelationships _ _ = []
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive =
+ concatMap (filePathToRelationships archive) $ filesInArchive archive
+
+filePathIsMedia :: FilePath -> Bool
+filePathIsMedia fp =
+ let (dir, _) = splitFileName fp
+ in
+ (dir == "word/media/")
+
+lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
+ absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
+ lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
+ lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
+ return lvl
+
+
+numElemToNum :: NameSpaces -> Element -> Maybe Numb
+numElemToNum ns element
+ | isElem ns "w" "num" element = do
+ numId <- findAttrByName ns "w" "numId" element
+ absNumId <- findChildByName ns "w" "abstractNumId" element
+ >>= findAttrByName ns "w" "val"
+ return $ Numb numId absNumId
+numElemToNum _ _ = Nothing
+
+absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
+absNumElemToAbsNum ns element
+ | isElem ns "w" "abstractNum" element = do
+ absNumId <- findAttrByName ns "w" "abstractNumId" element
+ let levelElems = findChildrenByName ns "w" "lvl" element
+ levels = mapMaybe (levelElemToLevel ns) levelElems
+ return $ AbstractNumb absNumId levels
+absNumElemToAbsNum _ _ = Nothing
+
+levelElemToLevel :: NameSpaces -> Element -> Maybe Level
+levelElemToLevel ns element
+ | isElem ns "w" "lvl" element = do
+ ilvl <- findAttrByName ns "w" "ilvl" element
+ fmt <- findChildByName ns "w" "numFmt" element
+ >>= findAttrByName ns "w" "val"
+ txt <- findChildByName ns "w" "lvlText" element
+ >>= findAttrByName ns "w" "val"
+ let start = findChildByName ns "w" "start" element
+ >>= findAttrByName ns "w" "val"
+ >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+ return (ilvl, fmt, txt, start)
+levelElemToLevel _ _ = Nothing
+
+archiveToNumbering' :: Archive -> Maybe Numbering
+archiveToNumbering' zf = do
+ case findEntryByPath "word/numbering.xml" zf of
+ Nothing -> Just $ Numbering [] [] []
+ Just entry -> do
+ numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ let namespaces = elemToNameSpaces numberingElem
+ numElems = findChildrenByName namespaces "w" "num" numberingElem
+ absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
+ nums = mapMaybe (numElemToNum namespaces) numElems
+ absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
+ return $ Numbering namespaces nums absNums
+
+archiveToNumbering :: Archive -> Numbering
+archiveToNumbering archive =
+ fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
+elemToNotes ns notetype element
+ | isElem ns "w" (notetype ++ "s") element =
+ let pairs = mapMaybe
+ (\e -> findAttrByName ns "w" "id" e >>=
+ (\a -> Just (a, e)))
+ (findChildrenByName ns "w" notetype element)
+ in
+ Just $ M.fromList $ pairs
+elemToNotes _ _ _ = Nothing
+
+elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments ns element
+ | isElem ns "w" "comments" element =
+ let pairs = mapMaybe
+ (\e -> findAttrByName ns "w" "id" e >>=
+ (\a -> Just (a, e)))
+ (findChildrenByName ns "w" "comment" element)
+ in
+ M.fromList $ pairs
+elemToComments _ _ = M.empty
+
+
+---------------------------------------------
+---------------------------------------------
+
+elemToTblGrid :: NameSpaces -> Element -> D TblGrid
+elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
+ let cols = findChildrenByName ns "w" "gridCol" element
+ in
+ mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+ cols
+elemToTblGrid _ _ = throwError WrongElem
+
+elemToTblLook :: NameSpaces -> Element -> D TblLook
+elemToTblLook ns element | isElem ns "w" "tblLook" element =
+ let firstRow = findAttrByName ns "w" "firstRow" element
+ val = findAttrByName ns "w" "val" element
+ firstRowFmt =
+ case firstRow of
+ Just "1" -> True
+ Just _ -> False
+ Nothing -> case val of
+ Just bitMask -> testBitMask bitMask 0x020
+ Nothing -> False
+ in
+ return $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = throwError WrongElem
+
+elemToRow :: NameSpaces -> Element -> D Row
+elemToRow ns element | isElem ns "w" "tr" element =
+ do
+ let cellElems = findChildrenByName ns "w" "tc" element
+ cells <- mapD (elemToCell ns) cellElems
+ return $ Row cells
+elemToRow _ _ = throwError WrongElem
+
+elemToCell :: NameSpaces -> Element -> D Cell
+elemToCell ns element | isElem ns "w" "tc" element =
+ do
+ cellContents <- mapD (elemToBodyPart ns) (elChildren element)
+ return $ Cell cellContents
+elemToCell _ _ = throwError WrongElem
+
+elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+elemToParIndentation ns element | isElem ns "w" "ind" element =
+ Just $ ParIndentation {
+ leftParIndent =
+ findAttrByName ns "w" "left" element >>=
+ stringToInteger
+ , rightParIndent =
+ findAttrByName ns "w" "right" element >>=
+ stringToInteger
+ , hangingParIndent =
+ findAttrByName ns "w" "hanging" element >>=
+ stringToInteger}
+elemToParIndentation _ _ = Nothing
+
+testBitMask :: String -> Int -> Bool
+testBitMask bitMaskS n =
+ case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+ [] -> False
+ ((n', _) : _) -> ((n' .|. n) /= 0)
+
+stringToInteger :: String -> Maybe Integer
+stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+
+elemToBodyPart :: NameSpaces -> Element -> D BodyPart
+elemToBodyPart ns element
+ | isElem ns "w" "p" element
+ , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
+ do
+ expsLst <- eitherToD $ readOMML $ showElement c
+ return $ OMathPara expsLst
+elemToBodyPart ns element
+ | isElem ns "w" "p" element
+ , Just (numId, lvl) <- getNumInfo ns element = do
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ num <- asks envNumbering
+ let levelInfo = lookupLevel numId lvl num
+ return $ ListItem parstyle numId lvl levelInfo parparts
+elemToBodyPart ns element
+ | isElem ns "w" "p" element = do
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ -- Word uses list enumeration for numbered headings, so we only
+ -- want to infer a list from the styles if it is NOT a heading.
+ case pHeading parstyle of
+ Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+ num <- asks envNumbering
+ let levelInfo = lookupLevel numId lvl num
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ _ -> return $ Paragraph parstyle parparts
+elemToBodyPart ns element
+ | isElem ns "w" "tbl" element = do
+ let caption' = findChildByName ns "w" "tblPr" element
+ >>= findChildByName ns "w" "tblCaption"
+ >>= findAttrByName ns "w" "val"
+ caption = (fromMaybe "" caption')
+ grid' = case findChildByName ns "w" "tblGrid" element of
+ Just g -> elemToTblGrid ns g
+ Nothing -> return []
+ tblLook' = case findChildByName ns "w" "tblPr" element >>=
+ findChildByName ns "w" "tblLook"
+ of
+ Just l -> elemToTblLook ns l
+ Nothing -> return defaultTblLook
+
+ grid <- grid'
+ tblLook <- tblLook'
+ rows <- mapD (elemToRow ns) (elChildren element)
+ return $ Tbl caption grid tblLook rows
+elemToBodyPart _ _ = throwError WrongElem
+
+lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
+lookupRelationship docLocation relid rels =
+ lookup (docLocation, relid) pairs
+ where
+ pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
+
+expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId s = do
+ location <- asks envLocation
+ target <- asks (lookupRelationship location s . envRelationships)
+ case target of
+ Just filepath -> do
+ bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
+ case bytes of
+ Just bs -> return (filepath, bs)
+ Nothing -> throwError DocxError
+ Nothing -> throwError DocxError
+
+getTitleAndAlt :: NameSpaces -> Element -> (String, String)
+getTitleAndAlt ns element =
+ let mbDocPr = findChildByName ns "wp" "inline" element >>=
+ findChildByName ns "wp" "docPr"
+ title = case mbDocPr >>= findAttrByName ns "" "title" of
+ Just title' -> title'
+ Nothing -> ""
+ alt = case mbDocPr >>= findAttrByName ns "" "descr" of
+ Just alt' -> alt'
+ Nothing -> ""
+ in (title, alt)
+
+elemToParPart :: NameSpaces -> Element -> D ParPart
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
+ , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
+ , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
+ = let (title, alt) = getTitleAndAlt ns drawingElem
+ a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
+ >>= findAttrByName ns "r" "embed"
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
+ Nothing -> throwError WrongElem
+-- The below is an attempt to deal with images in deprecated vml format.
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just _ <- findChildByName ns "w" "pict" element =
+ let drawing = findElement (elemName ns "v" "imagedata") element
+ >>= findAttrByName ns "r" "id"
+ in
+ case drawing of
+ -- Todo: check out title and attr for deprecated format.
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+ Nothing -> throwError WrongElem
+-- Chart
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
+ , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
+ , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
+ = return Chart
+elemToParPart ns element
+ | isElem ns "w" "r" element =
+ elemToRun ns element >>= (\r -> return $ PlainRun r)
+elemToParPart ns element
+ | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ Insertion cId cAuthor cDate runs
+elemToParPart ns element
+ | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ Deletion cId cAuthor cDate runs
+elemToParPart ns element
+ | isElem ns "w" "smartTag" element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ SmartTag runs
+elemToParPart ns element
+ | isElem ns "w" "bookmarkStart" element
+ , Just bmId <- findAttrByName ns "w" "id" element
+ , Just bmName <- findAttrByName ns "w" "name" element =
+ return $ BookMark bmId bmName
+elemToParPart ns element
+ | isElem ns "w" "hyperlink" element
+ , Just relId <- findAttrByName ns "r" "id" element = do
+ location <- asks envLocation
+ runs <- mapD (elemToRun ns) (elChildren element)
+ rels <- asks envRelationships
+ case lookupRelationship location relId rels of
+ Just target -> do
+ case findAttrByName ns "w" "anchor" element of
+ Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ Nothing -> return $ ExternalHyperLink target runs
+ Nothing -> return $ ExternalHyperLink "" runs
+elemToParPart ns element
+ | isElem ns "w" "hyperlink" element
+ , Just anchor <- findAttrByName ns "w" "anchor" element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ InternalHyperLink anchor runs
+elemToParPart ns element
+ | isElem ns "w" "commentRangeStart" element
+ , Just cmtId <- findAttrByName ns "w" "id" element = do
+ (Comments _ commentMap) <- asks envComments
+ case M.lookup cmtId commentMap of
+ Just cmtElem -> elemToCommentStart ns cmtElem
+ Nothing -> throwError WrongElem
+elemToParPart ns element
+ | isElem ns "w" "commentRangeEnd" element
+ , Just cmtId <- findAttrByName ns "w" "id" element =
+ return $ CommentEnd cmtId
+elemToParPart ns element
+ | isElem ns "m" "oMath" element =
+ (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
+elemToParPart _ _ = throwError WrongElem
+
+elemToCommentStart :: NameSpaces -> Element -> D ParPart
+elemToCommentStart ns element
+ | isElem ns "w" "comment" element
+ , Just cmtId <- findAttrByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrByName ns "w" "author" element
+ , Just cmtDate <- findAttrByName ns "w" "date" element = do
+ bps <- mapD (elemToBodyPart ns) (elChildren element)
+ return $ CommentStart cmtId cmtAuthor cmtDate bps
+elemToCommentStart _ _ = throwError WrongElem
+
+lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
+
+lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
+
+elemToExtent :: Element -> Extent
+elemToExtent drawingElem =
+ case (getDim "cx", getDim "cy") of
+ (Just w, Just h) -> Just (w, h)
+ _ -> Nothing
+ where
+ wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
+ getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead
+
+
+childElemToRun :: NameSpaces -> Element -> D Run
+childElemToRun ns element
+ | isElem ns "w" "drawing" element
+ , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
+ , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) element
+ = let (title, alt) = getTitleAndAlt ns element
+ a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
+ >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>=
+ (\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element)
+ Nothing -> throwError WrongElem
+childElemToRun ns element
+ | isElem ns "w" "drawing" element
+ , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
+ , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
+ = return InlineChart
+childElemToRun ns element
+ | isElem ns "w" "footnoteReference" element
+ , Just fnId <- findAttrByName ns "w" "id" element = do
+ notes <- asks envNotes
+ case lookupFootnote fnId notes of
+ Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
+ return $ Footnote bps
+ Nothing -> return $ Footnote []
+childElemToRun ns element
+ | isElem ns "w" "endnoteReference" element
+ , Just enId <- findAttrByName ns "w" "id" element = do
+ notes <- asks envNotes
+ case lookupEndnote enId notes of
+ Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
+ return $ Endnote bps
+ Nothing -> return $ Endnote []
+childElemToRun _ _ = throwError WrongElem
+
+elemToRun :: NameSpaces -> Element -> D Run
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
+ do let choices = findChildrenByName ns "mc" "Choice" altCont
+ choiceChildren = map head $ filter (not . null) $ map elChildren choices
+ outputs <- mapD (childElemToRun ns) choiceChildren
+ case outputs of
+ r : _ -> return r
+ [] -> throwError WrongElem
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element =
+ childElemToRun ns drawingElem
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChildByName ns "w" "footnoteReference" element =
+ childElemToRun ns ref
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChildByName ns "w" "endnoteReference" element =
+ childElemToRun ns ref
+elemToRun ns element
+ | isElem ns "w" "r" element = do
+ runElems <- elemToRunElems ns element
+ runStyle <- elemToRunStyleD ns element
+ return $ Run runStyle runElems
+elemToRun _ _ = throwError WrongElem
+
+getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue field style
+ | Just value <- field style = Just value
+ | Just parentStyle <- psStyle style
+ = getParentStyleValue field (snd parentStyle)
+getParentStyleValue _ _ = Nothing
+
+getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
+ Maybe a
+getParStyleField field stylemap styles
+ | x <- mapMaybe (\x -> M.lookup x stylemap) styles
+ , (y:_) <- mapMaybe (getParentStyleValue field) x
+ = Just y
+getParStyleField _ _ _ = Nothing
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
+elemToParagraphStyle ns element sty
+ | Just pPr <- findChildByName ns "w" "pPr" element =
+ let style =
+ mapMaybe
+ (findAttrByName ns "w" "val")
+ (findChildrenByName ns "w" "pStyle" pPr)
+ in ParagraphStyle
+ {pStyle = style
+ , indentation =
+ findChildByName ns "w" "ind" pPr >>=
+ elemToParIndentation ns
+ , dropCap =
+ case
+ findChildByName ns "w" "framePr" pPr >>=
+ findAttrByName ns "w" "dropCap"
+ of
+ Just "none" -> False
+ Just _ -> True
+ Nothing -> False
+ , pHeading = getParStyleField headingLev sty style
+ , pNumInfo = getParStyleField numInfo sty style
+ , pBlockQuote = getParStyleField isBlockQuote sty style
+ }
+elemToParagraphStyle _ _ _ = defaultParagraphStyle
+
+checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
+checkOnOff ns rPr tag
+ | Just t <- findChild tag rPr
+ , Just val <- findAttrByName ns "w" "val" t =
+ Just $ case val of
+ "true" -> True
+ "false" -> False
+ "on" -> True
+ "off" -> False
+ "1" -> True
+ "0" -> False
+ _ -> False
+ | Just _ <- findChild tag rPr = Just True
+checkOnOff _ _ _ = Nothing
+
+elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
+elemToRunStyleD ns element
+ | Just rPr <- findChildByName ns "w" "rPr" element = do
+ charStyles <- asks envCharStyles
+ let parentSty = case
+ findChildByName ns "w" "rStyle" rPr >>=
+ findAttrByName ns "w" "val"
+ of
+ Just styName | Just style <- M.lookup styName charStyles ->
+ Just (styName, style)
+ _ -> Nothing
+ return $ elemToRunStyle ns element parentSty
+elemToRunStyleD _ _ = return defaultRunStyle
+
+elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
+elemToRunStyle ns element parentStyle
+ | Just rPr <- findChildByName ns "w" "rPr" element =
+ RunStyle
+ {
+ isBold = checkOnOff ns rPr (elemName ns "w" "b")
+ , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
+ , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
+ , isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
+ , rVertAlign =
+ findChildByName ns "w" "vertAlign" rPr >>=
+ findAttrByName ns "w" "val" >>=
+ \v -> Just $ case v of
+ "superscript" -> SupScrpt
+ "subscript" -> SubScrpt
+ _ -> BaseLn
+ , rUnderline =
+ findChildByName ns "w" "u" rPr >>=
+ findAttrByName ns "w" "val"
+ , rStyle = parentStyle
+ }
+elemToRunStyle _ _ _ = defaultRunStyle
+
+isNumericNotNull :: String -> Bool
+isNumericNotNull str = (str /= []) && (all isDigit str)
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel ns element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just index <- stripPrefix "Heading" styleId
+ , isNumericNotNull index = Just (styleId, read index)
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just index <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val" >>=
+ stripPrefix "heading "
+ , isNumericNotNull index = Just (styleId, read index)
+getHeaderLevel _ _ = Nothing
+
+blockQuoteStyleIds :: [String]
+blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
+
+blockQuoteStyleNames :: [String]
+blockQuoteStyleNames = ["Quote", "Block Text"]
+
+getBlockQuote :: NameSpaces -> Element -> Maybe Bool
+getBlockQuote ns element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , styleId `elem` blockQuoteStyleIds = Just True
+ | Just styleName <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val"
+ , styleName `elem` blockQuoteStyleNames = Just True
+getBlockQuote _ _ = Nothing
+
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChildByName ns "w" "pPr" element >>=
+ findChildByName ns "w" "numPr"
+ lvl = fromMaybe "0" (numPr >>=
+ findChildByName ns "w" "ilvl" >>=
+ findAttrByName ns "w" "val")
+ numId <- numPr >>=
+ findChildByName ns "w" "numId" >>=
+ findAttrByName ns "w" "val"
+ return (numId, lvl)
+
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
+elemToParStyleData ns element parentStyle =
+ ParStyleData
+ {
+ headingLev = getHeaderLevel ns element
+ , isBlockQuote = getBlockQuote ns element
+ , numInfo = getNumInfo ns element
+ , psStyle = parentStyle
+ }
+
+elemToRunElem :: NameSpaces -> Element -> D RunElem
+elemToRunElem ns element
+ | isElem ns "w" "t" element
+ || isElem ns "w" "delText" element
+ || isElem ns "m" "t" element = do
+ let str = strContent element
+ font <- asks envFont
+ case font of
+ Nothing -> return $ TextRun str
+ Just f -> return . TextRun $
+ map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ | isElem ns "w" "br" element = return LnBrk
+ | isElem ns "w" "tab" element = return Tab
+ | isElem ns "w" "softHyphen" element = return SoftHyphen
+ | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
+ | isElem ns "w" "sym" element = return (getSymChar ns element)
+ | otherwise = throwError WrongElem
+ where
+ lowerFromPrivate (ord -> c)
+ | c >= ord '\xF000' = chr $ c - ord '\xF000'
+ | otherwise = chr c
+
+-- The char attribute is a hex string
+getSymChar :: NameSpaces -> Element -> RunElem
+getSymChar ns element
+ | Just s <- lowerFromPrivate <$> getCodepoint
+ , Just font <- getFont =
+ let [(char, _)] = readLitChar ("\\x" ++ s) in
+ TextRun . maybe "" (:[]) $ getUnicode font char
+ where
+ getCodepoint = findAttrByName ns "w" "char" element
+ getFont = stringToFont =<< findAttrByName ns "w" "font" element
+ lowerFromPrivate ('F':xs) = '0':xs
+ lowerFromPrivate xs = xs
+getSymChar _ _ = TextRun ""
+
+elemToRunElems :: NameSpaces -> Element -> D [RunElem]
+elemToRunElems ns element
+ | isElem ns "w" "r" element
+ || isElem ns "m" "r" element = do
+ let qualName = elemName ns "w"
+ let font = do
+ fontElem <- findElement (qualName "rFonts") element
+ stringToFont =<<
+ (foldr (<|>) Nothing $
+ map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+ local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
+elemToRunElems _ _ = throwError WrongElem
+
+setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
+setFont f s = s{envFont = f}
+
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..00906cf07
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,108 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , alterMap
+ , getMap
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
+insert (Just k) (Just v) m = alterMap (M.insert k v) m
+insert _ _ m = m
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = State StyleMaps a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = execState genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
+ case styleType of
+ Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ _ -> return ()
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM (Maybe StyleType)
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ Just "paragraph" -> return $ Just ParaStyle
+ Just "character" -> return $ Just CharStyle
+ _ -> return Nothing
+
+getAttrType :: Element -> StateM (Maybe String)
+getAttrType el = do
+ name <- elemName' "type"
+ return $ findAttr name el
+
+getAttrStyleId :: Element -> StateM (Maybe String)
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ return $ findAttr name el
+
+getNameVal :: Element -> StateM (Maybe String)
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ return $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..6646e5b7f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,47 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ , findChildByName
+ , findChildrenByName
+ , findAttrByName
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name =
+ QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ let ns' = ns ++ elemToNameSpaces element
+ in qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns'
+
+findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findChild (elemName ns' pref name) el
+
+findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findChildren (elemName ns' pref name) el
+
+findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName ns pref name el =
+ let ns' = ns ++ elemToNameSpaces el
+ in findAttr (elemName ns' pref name) el
+
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
new file mode 100644
index 000000000..2eaa842b6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -0,0 +1,279 @@
+{-# LANGUAGE
+ ViewPatterns
+ , StandaloneDeriving
+ , TupleSections
+ , FlexibleContexts #-}
+
+module Text.Pandoc.Readers.EPUB
+ (readEPUB)
+ where
+
+import Text.XML.Light
+import Text.Pandoc.Definition hiding (Attr)
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Walk (walk, query)
+import Text.Pandoc.Options ( ReaderOptions(..))
+import Text.Pandoc.Extensions (enableExtension, Extension(Ext_raw_html))
+import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
+import Network.URI (unEscapeString)
+import Text.Pandoc.MediaBag (MediaBag, insertMedia)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.MIME (MimeType)
+import qualified Text.Pandoc.Builder as B
+import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry
+ , findEntryByPath, Entry)
+import qualified Data.ByteString.Lazy as BL (ByteString)
+import System.FilePath ( takeFileName, (</>), dropFileName, normalise
+ , dropFileName
+ , splitFileName )
+import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Control.Monad (guard, liftM)
+import Data.List (isPrefixOf, isInfixOf)
+import Data.Maybe (mapMaybe, fromMaybe)
+import qualified Data.Map as M (Map, lookup, fromList, elems)
+import Data.Monoid ((<>))
+import Control.DeepSeq (deepseq, NFData)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+
+type Items = M.Map String (FilePath, MimeType)
+
+readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
+readEPUB opts bytes = case toArchiveOrFail bytes of
+ Right archive -> archiveToEPUB opts $ archive
+ Left _ -> throwError $ PandocParseError "Couldn't extract ePub file"
+
+-- runEPUB :: Except PandocError a -> Either PandocError a
+-- runEPUB = runExcept
+
+-- Note that internal reference are aggresively normalised so that all ids
+-- are of the form "filename#id"
+--
+archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
+archiveToEPUB os archive = do
+ -- root is path to folder with manifest file in
+ (root, content) <- getManifest archive
+ meta <- parseMeta content
+ (cover, items) <- parseManifest content
+ -- No need to collapse here as the image path is from the manifest file
+ let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
+ spine <- parseSpine items content
+ let escapedSpine = map (escapeURI . takeFileName . fst) spine
+ Pandoc _ bs <-
+ foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
+ `liftM` parseSpineElem root b) mempty spine
+ let ast = coverDoc <> (Pandoc meta bs)
+ P.setMediaBag $ fetchImages (M.elems items) root archive ast
+ return ast
+ where
+ os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)}
+ parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
+ parseSpineElem (normalise -> r) (normalise -> path, mime) = do
+ doc <- mimeToReader mime r path
+ let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
+ return $ docSpan <> doc
+ mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
+ mimeToReader "application/xhtml+xml" (unEscapeString -> root)
+ (unEscapeString -> path) = do
+ fname <- findEntryByPathE (root </> path) archive
+ html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname
+ return $ fixInternalReferences path html
+ mimeToReader s _ (unEscapeString -> path)
+ | s `elem` imageMimes = return $ imageToPandoc path
+ | otherwise = return $ mempty
+
+-- paths should be absolute when this function is called
+-- renameImages should do this
+fetchImages :: [(FilePath, MimeType)]
+ -> FilePath -- ^ Root
+ -> Archive
+ -> Pandoc
+ -> MediaBag
+fetchImages mimes root arc (query iq -> links) =
+ foldr (uncurry3 insertMedia) mempty
+ (mapMaybe getEntry links)
+ where
+ getEntry link =
+ let abslink = normalise (root </> link) in
+ (link , lookup link mimes, ) . fromEntry
+ <$> findEntryByPath abslink arc
+
+iq :: Inline -> [FilePath]
+iq (Image _ _ (url, _)) = [url]
+iq _ = []
+
+-- Remove relative paths
+renameImages :: FilePath -> Inline -> Inline
+renameImages root img@(Image attr a (url, b))
+ | "data:" `isPrefixOf` url = img
+ | otherwise = Image attr a (collapseFilePath (root </> url), b)
+renameImages _ x = x
+
+imageToPandoc :: FilePath -> Pandoc
+imageToPandoc s = B.doc . B.para $ B.image s "" mempty
+
+imageMimes :: [MimeType]
+imageMimes = ["image/gif", "image/jpeg", "image/png"]
+
+type CoverImage = FilePath
+
+parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items)
+parseManifest content = do
+ manifest <- findElementE (dfName "manifest") content
+ let items = findChildren (dfName "item") manifest
+ r <- mapM parseItem items
+ let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
+ return (cover, (M.fromList r))
+ where
+ findCover e = maybe False (isInfixOf "cover-image")
+ (findAttr (emptyName "properties") e)
+ parseItem e = do
+ uid <- findAttrE (emptyName "id") e
+ href <- findAttrE (emptyName "href") e
+ mime <- findAttrE (emptyName "media-type") e
+ return (uid, (href, mime))
+
+parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
+parseSpine is e = do
+ spine <- findElementE (dfName "spine") e
+ let itemRefs = findChildren (dfName "itemref") spine
+ mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs
+ where
+ parseItemRef ref = do
+ let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref)
+ guard linear
+ findAttr (emptyName "idref") ref
+
+parseMeta :: PandocMonad m => Element -> m Meta
+parseMeta content = do
+ meta <- findElementE (dfName "metadata") content
+ let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
+ dcspace _ = False
+ let dcs = filterChildrenName dcspace meta
+ let r = foldr parseMetaItem nullMeta dcs
+ return r
+
+-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
+parseMetaItem :: Element -> Meta -> Meta
+parseMetaItem e@(stripNamespace . elName -> field) meta =
+ addMetaField (renameMeta field) (B.str $ strContent e) meta
+
+renameMeta :: String -> String
+renameMeta "creator" = "author"
+renameMeta s = s
+
+getManifest :: PandocMonad m => Archive -> m (String, Element)
+getManifest archive = do
+ metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
+ docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+ let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
+ as <- liftM ((map attrToPair) . elAttribs)
+ (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
+ manifestFile <- mkE "Root not found" (lookup "full-path" as)
+ let rootdir = dropFileName manifestFile
+ --mime <- lookup "media-type" as
+ manifest <- findEntryByPathE manifestFile archive
+ liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+
+-- Fixup
+
+fixInternalReferences :: FilePath -> Pandoc -> Pandoc
+fixInternalReferences pathToFile =
+ (walk $ renameImages root)
+ . (walk $ fixBlockIRs filename)
+ . (walk $ fixInlineIRs filename)
+ where
+ (root, escapeURI -> filename) = splitFileName pathToFile
+
+fixInlineIRs :: String -> Inline -> Inline
+fixInlineIRs s (Span as v) =
+ Span (fixAttrs s as) v
+fixInlineIRs s (Code as code) =
+ Code (fixAttrs s as) code
+fixInlineIRs s (Link as is ('#':url, tit)) =
+ Link (fixAttrs s as) is (addHash s url, tit)
+fixInlineIRs s (Link as is t) =
+ Link (fixAttrs s as) is t
+fixInlineIRs _ v = v
+
+prependHash :: [String] -> Inline -> Inline
+prependHash ps l@(Link attr is (url, tit))
+ | or [s `isPrefixOf` url | s <- ps] =
+ Link attr is ('#':url, tit)
+ | otherwise = l
+prependHash _ i = i
+
+fixBlockIRs :: String -> Block -> Block
+fixBlockIRs s (Div as b) =
+ Div (fixAttrs s as) b
+fixBlockIRs s (Header i as b) =
+ Header i (fixAttrs s as) b
+fixBlockIRs s (CodeBlock as code) =
+ CodeBlock (fixAttrs s as) code
+fixBlockIRs _ b = b
+
+fixAttrs :: FilePath -> B.Attr -> B.Attr
+fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
+
+addHash :: String -> String -> String
+addHash _ "" = ""
+addHash s ident = takeFileName s ++ "#" ++ ident
+
+removeEPUBAttrs :: [(String, String)] -> [(String, String)]
+removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
+
+isEPUBAttr :: (String, String) -> Bool
+isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
+
+-- Library
+
+-- Strict version of foldM
+foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
+foldM' _ z [] = return z
+foldM' f z (x:xs) = do
+ z' <- f z x
+ z' `deepseq` foldM' f z' xs
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (a, b, c) = f a b c
+
+-- Utility
+
+stripNamespace :: QName -> String
+stripNamespace (QName v _ _) = v
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
+attrToNSPair _ = Nothing
+
+attrToPair :: Attr -> (String, String)
+attrToPair (Attr (QName name _ _) val) = (name, val)
+
+defaultNameSpace :: Maybe String
+defaultNameSpace = Just "http://www.idpf.org/2007/opf"
+
+dfName :: String -> QName
+dfName s = QName s defaultNameSpace Nothing
+
+emptyName :: String -> QName
+emptyName s = QName s Nothing Nothing
+
+-- Convert Maybe interface to Either
+
+findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE q e = mkE "findAttr" $ findAttr q e
+
+findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
+findEntryByPathE (normalise -> path) a =
+ mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+
+parseXMLDocE :: PandocMonad m => String -> m Element
+parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+
+findElementE :: PandocMonad m => QName -> Element -> m Element
+findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+
+mkE :: PandocMonad m => String -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..f02f1a1d4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,1136 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
+ViewPatterns#-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.HTML
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.HTML ( readHtml
+ , htmlTag
+ , htmlInBalanced
+ , isInlineTag
+ , isBlockTag
+ , isTextTag
+ , isCommentTag
+ ) where
+
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
+import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
+ , escapeURI, safeRead )
+import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
+ Extension (Ext_epub_html_exts,
+ Ext_raw_html, Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Logging
+import Text.Pandoc.Parsing hiding ((<|>))
+import Text.Pandoc.Walk
+import qualified Data.Map as M
+import Data.Maybe ( fromMaybe, isJust)
+import Data.List ( intercalate, isInfixOf, isPrefixOf )
+import Data.Char ( isDigit )
+import Control.Monad ( guard, mzero, void, unless )
+import Control.Arrow ((***))
+import Control.Applicative ( (<|>) )
+import Data.Monoid (First (..))
+import Text.TeXMath (readMathML, writeTeX)
+import Data.Default (Default (..), def)
+import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift)
+import Network.URI (URI, parseURIReference, nonStrictRelativeTo)
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
+import Data.Monoid ((<>))
+import Text.Parsec.Error
+import qualified Data.Set as Set
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import Control.Monad.Except (throwError)
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m Pandoc
+readHtml opts inp = do
+ let tags = stripPrefixes . canonicalizeTags $
+ parseTagsOptions parseOptions{ optTagPosition = True } inp
+ parseDoc = do
+ blocks <- (fixPlains False) . mconcat <$> manyTill block eof
+ meta <- stateMeta . parserState <$> getState
+ bs' <- replaceNotes (B.toList blocks)
+ return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
+ result <- flip runReaderT def $
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
+ "source" tags
+ case result of
+ Right doc -> return doc
+ Left err -> throwError $ PandocParseError $ getError err
+
+replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
+replaceNotes = walkM replaceNotes'
+
+replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline
+replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
+ where
+ getNotes = noteTable <$> getState
+replaceNotes' x = return x
+
+data HTMLState =
+ HTMLState
+ { parserState :: ParserState,
+ noteTable :: [(String, Blocks)],
+ baseHref :: Maybe URI,
+ identifiers :: Set.Set String,
+ headerMap :: M.Map Inlines String
+ }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
+ , inChapter :: Bool -- ^ Set if in chapter section
+ , inPlain :: Bool -- ^ Set if in pPlain
+ }
+
+setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
+setInChapter = local (\s -> s {inChapter = True})
+
+setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
+setInPlain = local (\s -> s {inPlain = True})
+
+type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
+
+type TagParser m = HTMLParser m [Tag String]
+
+pBody :: PandocMonad m => TagParser m Blocks
+pBody = pInTags "body" block
+
+pHead :: PandocMonad m => TagParser m Blocks
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag)
+ where pTitle = pInTags "title" inline >>= setTitle . trimInlines
+ setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
+ pMetaTag = do
+ mt <- pSatisfy (~== TagOpen "meta" [])
+ let name = fromAttrib "name" mt
+ if null name
+ then return mempty
+ else do
+ let content = fromAttrib "content" mt
+ updateState $ \s ->
+ let ps = parserState s in
+ s{ parserState = ps{
+ stateMeta = addMetaField name (B.text content)
+ (stateMeta ps) } }
+ return mempty
+ pBaseTag = do
+ bt <- pSatisfy (~== TagOpen "base" [])
+ updateState $ \st -> st{ baseHref =
+ parseURIReference $ fromAttrib "href" bt }
+ return mempty
+
+block :: PandocMonad m => TagParser m Blocks
+block = do
+ pos <- getPosition
+ res <- choice
+ [ eSection
+ , eSwitch B.para block
+ , mempty <$ eFootnote
+ , mempty <$ eTOC
+ , mempty <$ eTitlePage
+ , pPara
+ , pHeader
+ , pBlockQuote
+ , pCodeBlock
+ , pList
+ , pHrule
+ , pTable
+ , pHead
+ , pBody
+ , pDiv
+ , pPlain
+ , pRawHtmlBlock
+ ]
+ report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ return res
+
+namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
+namespaces = [(mathMLNamespace, pMath True)]
+
+mathMLNamespace :: String
+mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
+
+eSwitch :: (PandocMonad m, Monoid a)
+ => (Inlines -> a)
+ -> TagParser m a
+ -> TagParser m a
+eSwitch constructor parser = try $ do
+ guardEnabled Ext_epub_html_exts
+ pSatisfy (~== TagOpen "switch" [])
+ cases <- getFirst . mconcat <$>
+ manyTill (First <$> (eCase <* skipMany pBlank) )
+ (lookAhead $ try $ pSatisfy (~== TagOpen "default" []))
+ skipMany pBlank
+ fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank)
+ skipMany pBlank
+ pSatisfy (~== TagClose "switch")
+ return $ maybe fallback constructor cases
+
+eCase :: PandocMonad m => TagParser m (Maybe Inlines)
+eCase = do
+ skipMany pBlank
+ TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
+ case (flip lookup namespaces) =<< lookup "required-namespace" attr of
+ Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
+ Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
+
+eFootnote :: PandocMonad m => TagParser m ()
+eFootnote = try $ do
+ let notes = ["footnote", "rearnote"]
+ guardEnabled Ext_epub_html_exts
+ (TagOpen tag attr) <- lookAhead $ pAnyTag
+ guard (maybe False (flip elem notes) (lookup "type" attr))
+ let ident = fromMaybe "" (lookup "id" attr)
+ content <- pInTags tag block
+ addNote ident content
+
+addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
+addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
+
+eNoteref :: PandocMonad m => TagParser m Inlines
+eNoteref = try $ do
+ guardEnabled Ext_epub_html_exts
+ TagOpen tag attr <- lookAhead $ pAnyTag
+ guard (maybe False (== "noteref") (lookup "type" attr))
+ let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
+ guard (not (null ident))
+ pInTags tag block
+ return $ B.rawInline "noteref" ident
+
+-- Strip TOC if there is one, better to generate again
+eTOC :: PandocMonad m => TagParser m ()
+eTOC = try $ do
+ guardEnabled Ext_epub_html_exts
+ (TagOpen tag attr) <- lookAhead $ pAnyTag
+ guard (maybe False (== "toc") (lookup "type" attr))
+ void (pInTags tag block)
+
+pList :: PandocMonad m => TagParser m Blocks
+pList = pBulletList <|> pOrderedList <|> pDefinitionList
+
+pBulletList :: PandocMonad m => TagParser m Blocks
+pBulletList = try $ do
+ pSatisfy (~== TagOpen "ul" [])
+ let nonItem = pSatisfy (\t ->
+ not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
+ not (t ~== TagClose "ul"))
+ -- note: if they have an <ol> or <ul> not in scope of a <li>,
+ -- treat it as a list item, though it's not valid xhtml...
+ skipMany nonItem
+ items <- manyTill (pListItem nonItem) (pCloses "ul")
+ return $ B.bulletList $ map (fixPlains True) items
+
+pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
+pListItem nonItem = do
+ TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
+ let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
+ (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal" = Decimal
+parseListStyleType _ = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _ = DefaultStyle
+
+pOrderedList :: PandocMonad m => TagParser m Blocks
+pOrderedList = try $ do
+ TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
+ let (start, style) = (sta', sty')
+ where sta = fromMaybe "1" $
+ lookup "start" attribs
+ sta' = if all isDigit sta
+ then read sta
+ else 1
+
+ pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+ typeAttr = fromMaybe "" $ lookup "type" attribs
+ classAttr = fromMaybe "" $ lookup "class" attribs
+ styleAttr = fromMaybe "" $ lookup "style" attribs
+ listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+ sty' = foldOrElse DefaultStyle
+ [ parseTypeAttr typeAttr
+ , parseListStyleType classAttr
+ , parseListStyleType listStyle
+ ]
+ let nonItem = pSatisfy (\t ->
+ not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
+ not (t ~== TagClose "ol"))
+ -- note: if they have an <ol> or <ul> not in scope of a <li>,
+ -- treat it as a list item, though it's not valid xhtml...
+ skipMany nonItem
+ items <- manyTill (pListItem nonItem) (pCloses "ol")
+ return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
+
+pDefinitionList :: PandocMonad m => TagParser m Blocks
+pDefinitionList = try $ do
+ pSatisfy (~== TagOpen "dl" [])
+ items <- manyTill pDefListItem (pCloses "dl")
+ return $ B.definitionList items
+
+pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks])
+pDefListItem = try $ do
+ let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
+ not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
+ terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
+ defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
+ skipMany nonItem
+ let term = foldl1 (\x y -> x <> B.linebreak <> y) terms
+ return (term, map (fixPlains True) defs)
+
+fixPlains :: Bool -> Blocks -> Blocks
+fixPlains inList bs = if any isParaish bs'
+ then B.fromList $ map plainToPara bs'
+ else bs
+ where isParaish (Para _) = True
+ isParaish (CodeBlock _ _) = True
+ isParaish (Header _ _ _) = True
+ isParaish (BlockQuote _) = True
+ isParaish (BulletList _) = not inList
+ isParaish (OrderedList _ _) = not inList
+ isParaish (DefinitionList _) = not inList
+ isParaish _ = False
+ plainToPara (Plain xs) = Para xs
+ plainToPara x = x
+ bs' = B.toList bs
+
+pRawTag :: PandocMonad m => TagParser m String
+pRawTag = do
+ tag <- pAnyTag
+ let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
+ if tagOpen ignorable (const True) tag || tagClose ignorable tag
+ then return []
+ else return $ renderTags' [tag]
+
+pDiv :: PandocMonad m => TagParser m Blocks
+pDiv = try $ do
+ guardEnabled Ext_native_divs
+ let isDivLike "div" = True
+ isDivLike "section" = True
+ isDivLike _ = False
+ TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
+ contents <- pInTags tag block
+ let (ident, classes, kvs) = mkAttr attr
+ let classes' = if tag == "section"
+ then "section":classes
+ else classes
+ return $ B.divWith (ident, classes', kvs) contents
+
+pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
+pRawHtmlBlock = do
+ raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
+ exts <- getOption readerExtensions
+ if extensionEnabled Ext_raw_html exts && not (null raw)
+ then return $ B.rawBlock "html" raw
+ else ignore raw
+
+ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
+ignore raw = do
+ pos <- getPosition
+ -- raw can be null for tags like <!DOCTYPE>; see paRawTag
+ -- in this case we don't want a warning:
+ unless (null raw) $
+ report $ SkippedContent raw pos
+ return mempty
+
+pHtmlBlock :: PandocMonad m => String -> TagParser m String
+pHtmlBlock t = try $ do
+ open <- pSatisfy (~== TagOpen t [])
+ contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
+ return $ renderTags' $ [open] ++ contents ++ [TagClose t]
+
+-- Sets chapter context
+eSection :: PandocMonad m => TagParser m Blocks
+eSection = try $ do
+ let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
+ let sectTag = tagOpen (`elem` sectioningContent) matchChapter
+ TagOpen tag _ <- lookAhead $ pSatisfy sectTag
+ setInChapter (pInTags tag block)
+
+headerLevel :: PandocMonad m => String -> TagParser m Int
+headerLevel tagtype = do
+ let level = read (drop 1 tagtype)
+ (try $ do
+ guardEnabled Ext_epub_html_exts
+ asks inChapter >>= guard
+ return (level - 1))
+ <|>
+ return level
+
+eTitlePage :: PandocMonad m => TagParser m ()
+eTitlePage = try $ do
+ let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as)
+ let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section")
+ isTitlePage
+ TagOpen tag _ <- lookAhead $ pSatisfy groupTag
+ () <$ pInTags tag block
+
+pHeader :: PandocMonad m => TagParser m Blocks
+pHeader = try $ do
+ TagOpen tagtype attr <- pSatisfy $
+ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
+ (const True)
+ let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
+ level <- headerLevel tagtype
+ contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
+ let ident = fromMaybe "" $ lookup "id" attr
+ let classes = maybe [] words $ lookup "class" attr
+ let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
+ attr' <- registerHeader (ident, classes, keyvals) contents
+ return $ if bodyTitle
+ then mempty -- skip a representation of the title in the body
+ else B.headerWith attr' level contents
+
+pHrule :: PandocMonad m => TagParser m Blocks
+pHrule = do
+ pSelfClosing (=="hr") (const True)
+ return B.horizontalRule
+
+pTable :: PandocMonad m => TagParser m Blocks
+pTable = try $ do
+ TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
+ skipMany pBlank
+ caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
+ widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
+ let pTh = option [] $ pInTags "tr" (pCell "th")
+ pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = do pOptInTag "tbody" $ many1 pTr
+ head'' <- pOptInTag "thead" pTh
+ head' <- pOptInTag "tbody" $ do
+ if null head''
+ then pTh
+ else return head''
+ rowsLs <- many pTBody
+ rows' <- pOptInTag "tfoot" $ many pTr
+ TagClose _ <- pSatisfy (~== TagClose "table")
+ let rows'' = (concat rowsLs) ++ rows'
+ -- fail on empty table
+ guard $ not $ null head' && null rows''
+ let isSinglePlain x = case B.toList x of
+ [] -> True
+ [Plain _] -> True
+ _ -> False
+ let isSimple = all isSinglePlain $ concat (head':rows'')
+ let cols = length $ if null head' then head rows'' else head'
+ -- add empty cells to short rows
+ let addEmpties r = case cols - length r of
+ n | n > 0 -> r ++ replicate n mempty
+ | otherwise -> r
+ let rows = map addEmpties rows''
+ let aligns = replicate cols AlignDefault
+ let widths = if null widths'
+ then if isSimple
+ then replicate cols 0
+ else replicate cols (1.0 / fromIntegral cols)
+ else widths'
+ return $ B.table caption (zip aligns widths) head' rows
+
+pCol :: PandocMonad m => TagParser m Double
+pCol = try $ do
+ TagOpen _ attribs <- pSatisfy (~== TagOpen "col" [])
+ skipMany pBlank
+ optional $ pSatisfy (~== TagClose "col")
+ skipMany pBlank
+ return $ case lookup "width" attribs of
+ Nothing -> case lookup "style" attribs of
+ Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
+ fromMaybe 0.0 $ safeRead ('0':'.':filter
+ (`notElem` " \t\r\n%'\";") xs)
+ _ -> 0.0
+ Just x | not (null x) && last x == '%' ->
+ fromMaybe 0.0 $ safeRead ('0':'.':init x)
+ _ -> 0.0
+
+pColgroup :: PandocMonad m => TagParser m [Double]
+pColgroup = try $ do
+ pSatisfy (~== TagOpen "colgroup" [])
+ skipMany pBlank
+ manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
+
+noColOrRowSpans :: Tag String -> Bool
+noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
+ where isNullOrOne x = case fromAttrib x t of
+ "" -> True
+ "1" -> True
+ _ -> False
+
+pCell :: PandocMonad m => String -> TagParser m [Blocks]
+pCell celltype = try $ do
+ skipMany pBlank
+ res <- pInTags' celltype noColOrRowSpans block
+ skipMany pBlank
+ return [res]
+
+pBlockQuote :: PandocMonad m => TagParser m Blocks
+pBlockQuote = do
+ contents <- pInTags "blockquote" block
+ return $ B.blockQuote $ fixPlains False contents
+
+pPlain :: PandocMonad m => TagParser m Blocks
+pPlain = do
+ contents <- setInPlain $ trimInlines . mconcat <$> many1 inline
+ if B.isNull contents
+ then return mempty
+ else return $ B.plain contents
+
+pPara :: PandocMonad m => TagParser m Blocks
+pPara = do
+ contents <- trimInlines <$> pInTags "p" inline
+ return $ B.para contents
+
+pCodeBlock :: PandocMonad m => TagParser m Blocks
+pCodeBlock = try $ do
+ TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
+ contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
+ let rawText = concatMap tagToString contents
+ -- drop leading newline if any
+ let result' = case rawText of
+ '\n':xs -> xs
+ _ -> rawText
+ -- drop trailing newline if any
+ let result = case reverse result' of
+ '\n':_ -> init result'
+ _ -> result'
+ return $ B.codeBlockWith (mkAttr attr) result
+
+tagToString :: Tag String -> String
+tagToString (TagText s) = s
+tagToString (TagOpen "br" _) = "\n"
+tagToString _ = ""
+
+inline :: PandocMonad m => TagParser m Inlines
+inline = choice
+ [ eNoteref
+ , eSwitch id inline
+ , pTagText
+ , pQ
+ , pEmph
+ , pStrong
+ , pSuperscript
+ , pSubscript
+ , pStrikeout
+ , pLineBreak
+ , pLink
+ , pImage
+ , pCode
+ , pSpan
+ , pMath False
+ , pRawHtmlInline
+ ]
+
+pLocation :: PandocMonad m => TagParser m ()
+pLocation = do
+ (TagPosition r c) <- pSat isTagPosition
+ setPosition $ newPos "input" r c
+
+pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
+pSat f = do
+ pos <- getPosition
+ token show (const pos) (\x -> if f x then Just x else Nothing)
+
+pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String)
+pSatisfy f = try $ optional pLocation >> pSat f
+
+pAnyTag :: PandocMonad m => TagParser m (Tag String)
+pAnyTag = pSatisfy (const True)
+
+pSelfClosing :: PandocMonad m
+ => (String -> Bool) -> ([Attribute String] -> Bool)
+ -> TagParser m (Tag String)
+pSelfClosing f g = do
+ open <- pSatisfy (tagOpen f g)
+ optional $ pSatisfy (tagClose f)
+ return open
+
+pQ :: PandocMonad m => TagParser m Inlines
+pQ = do
+ context <- asks quoteContext
+ let quoteType = case context of
+ InDoubleQuote -> SingleQuote
+ _ -> DoubleQuote
+ let innerQuoteContext = if quoteType == SingleQuote
+ then InSingleQuote
+ else InDoubleQuote
+ let constructor = case quoteType of
+ SingleQuote -> B.singleQuoted
+ DoubleQuote -> B.doubleQuoted
+ withQuoteContext innerQuoteContext $
+ pInlinesInTags "q" constructor
+
+pEmph :: PandocMonad m => TagParser m Inlines
+pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
+
+pStrong :: PandocMonad m => TagParser m Inlines
+pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
+
+pSuperscript :: PandocMonad m => TagParser m Inlines
+pSuperscript = pInlinesInTags "sup" B.superscript
+
+pSubscript :: PandocMonad m => TagParser m Inlines
+pSubscript = pInlinesInTags "sub" B.subscript
+
+pStrikeout :: PandocMonad m => TagParser m Inlines
+pStrikeout = do
+ pInlinesInTags "s" B.strikeout <|>
+ pInlinesInTags "strike" B.strikeout <|>
+ pInlinesInTags "del" B.strikeout <|>
+ try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
+ contents <- mconcat <$> manyTill inline (pCloses "span")
+ return $ B.strikeout contents)
+
+pLineBreak :: PandocMonad m => TagParser m Inlines
+pLineBreak = do
+ pSelfClosing (=="br") (const True)
+ return B.linebreak
+
+-- Unlike fromAttrib from tagsoup, this distinguishes
+-- between a missing attribute and an attribute with empty content.
+maybeFromAttrib :: String -> Tag String -> Maybe String
+maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
+maybeFromAttrib _ _ = Nothing
+
+pLink :: PandocMonad m => TagParser m Inlines
+pLink = try $ do
+ tag <- pSatisfy $ tagOpenLit "a" (const True)
+ let title = fromAttrib "title" tag
+ -- take id from id attribute if present, otherwise name
+ let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag
+ let cls = words $ fromAttrib "class" tag
+ lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+ -- check for href; if href, then a link, otherwise a span
+ case maybeFromAttrib "href" tag of
+ Nothing ->
+ return $ B.spanWith (uid, cls, []) lab
+ Just url' -> do
+ mbBaseHref <- baseHref <$> getState
+ let url = case (parseURIReference url', mbBaseHref) of
+ (Just rel, Just bs) ->
+ show (rel `nonStrictRelativeTo` bs)
+ _ -> url'
+ return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+
+pImage :: PandocMonad m => TagParser m Inlines
+pImage = do
+ tag <- pSelfClosing (=="img") (isJust . lookup "src")
+ mbBaseHref <- baseHref <$> getState
+ let url' = fromAttrib "src" tag
+ let url = case (parseURIReference url', mbBaseHref) of
+ (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ _ -> url'
+ let title = fromAttrib "title" tag
+ let alt = fromAttrib "alt" tag
+ let uid = fromAttrib "id" tag
+ let cls = words $ fromAttrib "class" tag
+ let getAtt k = case fromAttrib k tag of
+ "" -> []
+ v -> [(k, v)]
+ let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
+ return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+
+pCode :: PandocMonad m => TagParser m Inlines
+pCode = try $ do
+ (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
+ result <- manyTill pAnyTag (pCloses open)
+ return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
+
+pSpan :: PandocMonad m => TagParser m Inlines
+pSpan = try $ do
+ guardEnabled Ext_native_spans
+ TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+ contents <- pInTags "span" inline
+ let isSmallCaps = fontVariant == "small-caps"
+ where styleAttr = fromMaybe "" $ lookup "style" attr
+ fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+ return $ tag contents
+
+pRawHtmlInline :: PandocMonad m => TagParser m Inlines
+pRawHtmlInline = do
+ inplain <- asks inPlain
+ result <- pSatisfy (tagComment (const True))
+ <|> if inplain
+ then pSatisfy (not . isBlockTag)
+ else pSatisfy isInlineTag
+ exts <- getOption readerExtensions
+ let raw = renderTags' [result]
+ if extensionEnabled Ext_raw_html exts
+ then return $ B.rawInline "html" raw
+ else ignore raw
+
+mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath s = writeTeX <$> readMathML s
+
+pMath :: PandocMonad m => Bool -> TagParser m Inlines
+pMath inCase = try $ do
+ open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
+ -- we'll assume math tags are MathML unless specially marked
+ -- otherwise...
+ unless inCase $
+ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
+ contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math"))
+ case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of
+ Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $
+ innerText contents
+ Right [] -> return mempty
+ Right x -> return $ case lookup "display" attr of
+ Just "block" -> B.displayMath x
+ _ -> B.math x
+
+pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines)
+ -> TagParser m Inlines
+pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
+
+pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a
+pInTags tagtype parser = pInTags' tagtype (const True) parser
+
+pInTags' :: (PandocMonad m, Monoid a)
+ => String
+ -> (Tag String -> Bool)
+ -> TagParser m a
+ -> TagParser m a
+pInTags' tagtype tagtest parser = try $ do
+ pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
+ mconcat <$> manyTill parser (pCloses tagtype <|> eof)
+
+-- parses p, preceeded by an optional opening tag
+-- and followed by an optional closing tags
+pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a
+pOptInTag tagtype p = try $ do
+ skipMany pBlank
+ optional $ pSatisfy (~== TagOpen tagtype [])
+ skipMany pBlank
+ x <- p
+ skipMany pBlank
+ optional $ pSatisfy (~== TagClose tagtype)
+ skipMany pBlank
+ return x
+
+pCloses :: PandocMonad m => String -> TagParser m ()
+pCloses tagtype = try $ do
+ t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
+ case t of
+ (TagClose t') | t' == tagtype -> pAnyTag >> return ()
+ (TagOpen t' _) | t' `closes` tagtype -> return ()
+ (TagClose "ul") | tagtype == "li" -> return ()
+ (TagClose "ol") | tagtype == "li" -> return ()
+ (TagClose "dl") | tagtype == "dd" -> return ()
+ (TagClose "table") | tagtype == "td" -> return ()
+ (TagClose "table") | tagtype == "tr" -> return ()
+ _ -> mzero
+
+pTagText :: PandocMonad m => TagParser m Inlines
+pTagText = try $ do
+ (TagText str) <- pSatisfy isTagText
+ st <- getState
+ qu <- ask
+ parsed <- lift $ lift $
+ flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ case parsed of
+ Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'"
+ Right result -> return $ mconcat result
+
+pBlank :: PandocMonad m => TagParser m ()
+pBlank = try $ do
+ (TagText str) <- pSatisfy isTagText
+ guard $ all isSpace str
+
+type InlinesParser m = HTMLParser m String
+
+pTagContents :: PandocMonad m => InlinesParser m Inlines
+pTagContents =
+ B.displayMath <$> mathDisplay
+ <|> B.math <$> mathInline
+ <|> pStr
+ <|> pSpace
+ <|> smartPunctuation pTagContents
+ <|> pSymbol
+ <|> pBad
+
+pStr :: PandocMonad m => InlinesParser m Inlines
+pStr = do
+ result <- many1 $ satisfy $ \c ->
+ not (isSpace c) && not (isSpecial c) && not (isBad c)
+ updateLastStrPos
+ return $ B.str result
+
+isSpecial :: Char -> Bool
+isSpecial '"' = True
+isSpecial '\'' = True
+isSpecial '.' = True
+isSpecial '-' = True
+isSpecial '$' = True
+isSpecial '\8216' = True
+isSpecial '\8217' = True
+isSpecial '\8220' = True
+isSpecial '\8221' = True
+isSpecial _ = False
+
+pSymbol :: PandocMonad m => InlinesParser m Inlines
+pSymbol = satisfy isSpecial >>= return . B.str . (:[])
+
+isBad :: Char -> Bool
+isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
+
+pBad :: PandocMonad m => InlinesParser m Inlines
+pBad = do
+ c <- satisfy isBad
+ let c' = case c of
+ '\128' -> '\8364'
+ '\130' -> '\8218'
+ '\131' -> '\402'
+ '\132' -> '\8222'
+ '\133' -> '\8230'
+ '\134' -> '\8224'
+ '\135' -> '\8225'
+ '\136' -> '\710'
+ '\137' -> '\8240'
+ '\138' -> '\352'
+ '\139' -> '\8249'
+ '\140' -> '\338'
+ '\142' -> '\381'
+ '\145' -> '\8216'
+ '\146' -> '\8217'
+ '\147' -> '\8220'
+ '\148' -> '\8221'
+ '\149' -> '\8226'
+ '\150' -> '\8211'
+ '\151' -> '\8212'
+ '\152' -> '\732'
+ '\153' -> '\8482'
+ '\154' -> '\353'
+ '\155' -> '\8250'
+ '\156' -> '\339'
+ '\158' -> '\382'
+ '\159' -> '\376'
+ _ -> '?'
+ return $ B.str [c']
+
+pSpace :: PandocMonad m => InlinesParser m Inlines
+pSpace = many1 (satisfy isSpace) >>= \xs ->
+ if '\n' `elem` xs
+ then return B.softbreak
+ else return B.space
+
+--
+-- Constants
+--
+
+eitherBlockOrInline :: [String]
+eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
+ "del", "ins",
+ "progress", "map", "area", "noscript", "script",
+ "object", "svg", "video", "source"]
+
+{-
+inlineHtmlTags :: [[Char]]
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+ "br", "cite", "code", "dfn", "em", "font", "i", "img",
+ "input", "kbd", "label", "q", "s", "samp", "select",
+ "small", "span", "strike", "strong", "sub", "sup",
+ "textarea", "tt", "u", "var"]
+-}
+
+blockHtmlTags :: [String]
+blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
+ "blockquote", "body", "button", "canvas",
+ "caption", "center", "col", "colgroup", "dd", "dir", "div",
+ "dl", "dt", "fieldset", "figcaption", "figure",
+ "footer", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "head", "header", "hgroup", "hr", "html",
+ "isindex", "menu", "noframes", "ol", "output", "p", "pre",
+ "section", "table", "tbody", "textarea",
+ "thead", "tfoot", "ul", "dd",
+ "dt", "frameset", "li", "tbody", "td", "tfoot",
+ "th", "thead", "tr", "script", "style"]
+
+-- We want to allow raw docbook in markdown documents, so we
+-- include docbook block tags here too.
+blockDocBookTags :: [String]
+blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+ "orderedlist", "segmentedlist", "simplelist",
+ "variablelist", "caution", "important", "note", "tip",
+ "warning", "address", "literallayout", "programlisting",
+ "programlistingco", "screen", "screenco", "screenshot",
+ "synopsis", "example", "informalexample", "figure",
+ "informalfigure", "table", "informaltable", "para",
+ "simpara", "formalpara", "equation", "informalequation",
+ "figure", "screenshot", "mediaobject", "qandaset",
+ "procedure", "task", "cmdsynopsis", "funcsynopsis",
+ "classsynopsis", "blockquote", "epigraph", "msgset",
+ "sidebar", "title"]
+
+epubTags :: [String]
+epubTags = ["case", "switch", "default"]
+
+blockTags :: [String]
+blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags
+
+isInlineTag :: Tag String -> Bool
+isInlineTag t = tagOpen isInlineTagName (const True) t ||
+ tagClose isInlineTagName t ||
+ tagComment (const True) t
+ where isInlineTagName x = x `notElem` blockTags
+
+isBlockTag :: Tag String -> Bool
+isBlockTag t = tagOpen isBlockTagName (const True) t ||
+ tagClose isBlockTagName t ||
+ tagComment (const True) t
+ where isBlockTagName ('?':_) = True
+ isBlockTagName ('!':_) = True
+ isBlockTagName x = x `elem` blockTags
+ || x `elem` eitherBlockOrInline
+
+isTextTag :: Tag String -> Bool
+isTextTag = tagText (const True)
+
+isCommentTag :: Tag String -> Bool
+isCommentTag = tagComment (const True)
+
+-- taken from HXT and extended
+-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags
+closes :: String -> String -> Bool
+_ `closes` "body" = False
+_ `closes` "html" = False
+"body" `closes` "head" = True
+"a" `closes` "a" = True
+"li" `closes` "li" = True
+"th" `closes` t | t `elem` ["th","td"] = True
+"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"dd" `closes` t | t `elem` ["dt", "dd"] = True
+"dt" `closes` t | t `elem` ["dt","dd"] = True
+"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"optgroup" `closes` "optgroup" = True
+"optgroup" `closes` "option" = True
+"option" `closes` "option" = True
+-- http://www.w3.org/TR/html-markup/p.html
+x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote",
+ "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section",
+ "table", "ul"] = True
+"meta" `closes` "meta" = True
+"form" `closes` "form" = True
+"label" `closes` "label" = True
+"map" `closes` "map" = True
+"object" `closes` "object" = True
+_ `closes` t | t `elem` ["option","style","script","textarea","title"] = True
+t `closes` "select" | t /= "option" = True
+"thead" `closes` t | t `elem` ["colgroup"] = True
+"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True
+"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True
+t `closes` t2 |
+ t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
+ t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
+t1 `closes` t2 |
+ t1 `elem` blockTags &&
+ t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
+_ `closes` _ = False
+
+--- parsers for use in markdown, textile readers
+
+-- | Matches a stretch of HTML in balanced tags.
+htmlInBalanced :: (Monad m)
+ => (Tag String -> Bool)
+ -> ParserT String st m String
+htmlInBalanced f = try $ do
+ lookAhead (char '<')
+ inp <- getInput
+ let ts = canonicalizeTags $
+ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True } inp
+ case ts of
+ (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
+ guard $ f t
+ guard $ not $ hasTagWarning (t : take 1 rest)
+ case htmlInBalanced' tn (t:rest) of
+ [] -> mzero
+ xs -> case reverse xs of
+ (TagClose _ : TagPosition er ec : _) -> do
+ let ls = er - sr
+ let cs = ec - sc
+ lscontents <- unlines <$> count ls anyLine
+ cscontents <- count cs anyChar
+ (_,closetag) <- htmlTag (~== TagClose tn)
+ return (lscontents ++ cscontents ++ closetag)
+ _ -> mzero
+ _ -> mzero
+
+htmlInBalanced' :: String
+ -> [Tag String]
+ -> [Tag String]
+htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
+ where go :: Int -> [Tag String] -> Maybe [Tag String]
+ go n (t@(TagOpen tn' _):rest) | tn' == tagname =
+ (t :) <$> go (n + 1) rest
+ go 1 (t@(TagClose tn'):_) | tn' == tagname =
+ return [t]
+ go n (t@(TagClose tn'):rest) | tn' == tagname =
+ (t :) <$> go (n - 1) rest
+ go n (t:ts') = (t :) <$> go n ts'
+ go _ [] = mzero
+
+hasTagWarning :: [Tag String] -> Bool
+hasTagWarning (TagWarning _:_) = True
+hasTagWarning _ = False
+
+-- | Matches a tag meeting a certain condition.
+htmlTag :: Monad m
+ => (Tag String -> Bool)
+ -> ParserT [Char] st m (Tag String, String)
+htmlTag f = try $ do
+ lookAhead (char '<')
+ inp <- getInput
+ let (next : _) = canonicalizeTags $ parseTagsOptions
+ parseOptions{ optTagWarning = False } inp
+ guard $ f next
+ let handleTag tagname = do
+ -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+ -- should NOT be parsed as an HTML tag, see #2277
+ guard $ not ('.' `elem` tagname)
+ -- <https://example.org> should NOT be a tag either.
+ -- tagsoup will parse it as TagOpen "https:" [("example.org","")]
+ guard $ not (null tagname)
+ guard $ last tagname /= ':'
+ rendered <- manyTill anyChar (char '>')
+ return (next, rendered ++ ">")
+ case next of
+ TagComment s
+ | "<!--" `isPrefixOf` inp -> do
+ count (length s + 4) anyChar
+ skipMany (satisfy (/='>'))
+ char '>'
+ return (next, "<!--" ++ s ++ "-->")
+ | otherwise -> fail "bogus comment mode, HTML5 parse error"
+ TagOpen tagname _attr -> handleTag tagname
+ TagClose tagname -> handleTag tagname
+ _ -> mzero
+
+mkAttr :: [(String, String)] -> Attr
+mkAttr attr = (attribsId, attribsClasses, attribsKV)
+ where attribsId = fromMaybe "" $ lookup "id" attr
+ attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes
+ attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+
+-- Strip namespace prefixes
+stripPrefixes :: [Tag String] -> [Tag String]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag String -> Tag String
+stripPrefix (TagOpen s as) =
+ TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as)
+stripPrefix (TagClose s) = TagClose (stripPrefix' s)
+stripPrefix x = x
+
+stripPrefix' :: String -> String
+stripPrefix' s =
+ case span (/= ':') s of
+ (_, "") -> s
+ (_, (_:ts)) -> ts
+
+isSpace :: Char -> Bool
+isSpace ' ' = True
+isSpace '\t' = True
+isSpace '\n' = True
+isSpace '\r' = True
+isSpace _ = False
+
+-- Instances
+
+instance HasIdentifierList HTMLState where
+ extractIdentifierList = identifiers
+ updateIdentifierList f s = s{ identifiers = f (identifiers s) }
+
+instance HasHeaderMap HTMLState where
+ extractHeaderMap = headerMap
+ updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where
+ getQuoteContext = asks quoteContext
+ withQuoteContext q = local (\s -> s{quoteContext = q})
+
+instance HasReaderOptions HTMLState where
+ extractReaderOptions = extractReaderOptions . parserState
+
+instance HasMeta HTMLState where
+ setMeta s b st = st {parserState = setMeta s b $ parserState st}
+ deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+ def = HTMLLocal NoQuote False False
+
+instance HasLastStrPosition HTMLState where
+ setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+ getLastStrPos = getLastStrPos . parserState
+
+
+-- EPUB Specific
+--
+--
+sectioningContent :: [String]
+sectioningContent = ["article", "aside", "nav", "section"]
+
+
+groupingContent :: [String]
+groupingContent = ["p", "hr", "pre", "blockquote", "ol"
+ , "ul", "li", "dl", "dt", "dt", "dd"
+ , "figure", "figcaption", "div", "main"]
+
+
+{-
+
+types :: [(String, ([String], Int))]
+types = -- Document divisions
+ map (\s -> (s, (["section", "body"], 0)))
+ ["volume", "part", "chapter", "division"]
+ ++ -- Document section and components
+ [
+ ("abstract", ([], 0))]
+-}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
new file mode 100644
index 000000000..310a04574
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -0,0 +1,160 @@
+{-# LANGUAGE CPP #-}
+{- |
+ Module : Text.Pandoc.Readers.Haddock
+ Copyright : Copyright (C) 2013 David Lazar
+ License : GNU GPL, version 2 or above
+
+ Maintainer : David Lazar <lazar6@illinois.edu>,
+ John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+
+Conversion of Haddock markup to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Haddock
+ ( readHaddock
+ ) where
+
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Data.Monoid ((<>))
+import Text.Pandoc.Shared (trim, splitBy)
+import Data.List (intersperse, stripPrefix)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Documentation.Haddock.Parser
+import Documentation.Haddock.Types
+import Text.Pandoc.Error
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad)
+
+
+-- | Parse Haddock markup and return a 'Pandoc' document.
+readHaddock :: PandocMonad m
+ => ReaderOptions
+ -> String
+ -> m Pandoc
+readHaddock opts s = case readHaddockEither opts s of
+ Right result -> return result
+ Left e -> throwError e
+
+readHaddockEither :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse
+ -> Either PandocError Pandoc
+readHaddockEither _opts =
+#if MIN_VERSION_haddock_library(1,2,0)
+ Right . B.doc . docHToBlocks . _doc . parseParas
+#else
+ Right . B.doc . docHToBlocks . parseParas
+#endif
+
+docHToBlocks :: DocH String Identifier -> Blocks
+docHToBlocks d' =
+ case d' of
+ DocEmpty -> mempty
+ DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
+ B.headerWith (ident,[],[]) (headerLevel h)
+ (docHToInlines False $ headerTitle h)
+ DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
+ DocString _ -> inlineFallback
+ DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
+ DocParagraph x -> B.para $ docHToInlines False x
+ DocIdentifier _ -> inlineFallback
+ DocIdentifierUnchecked _ -> inlineFallback
+ DocModule s -> B.plain $ docHToInlines False $ DocModule s
+ DocWarning _ -> mempty -- TODO
+ DocEmphasis _ -> inlineFallback
+ DocMonospaced _ -> inlineFallback
+ DocBold _ -> inlineFallback
+#if MIN_VERSION_haddock_library(1,4,0)
+ DocMathInline _ -> inlineFallback
+ DocMathDisplay _ -> inlineFallback
+#endif
+ DocHeader h -> B.header (headerLevel h)
+ (docHToInlines False $ headerTitle h)
+ DocUnorderedList items -> B.bulletList (map docHToBlocks items)
+ DocOrderedList items -> B.orderedList (map docHToBlocks items)
+ DocDefList items -> B.definitionList (map (\(d,t) ->
+ (docHToInlines False d,
+ [consolidatePlains $ docHToBlocks t])) items)
+ DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s
+ DocCodeBlock d -> B.para $ docHToInlines True d
+ DocHyperlink _ -> inlineFallback
+ DocPic _ -> inlineFallback
+ DocAName _ -> inlineFallback
+ DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
+ DocExamples es -> mconcat $ map (\e ->
+ makeExample ">>>" (exampleExpression e) (exampleResult e)) es
+
+ where inlineFallback = B.plain $ docHToInlines False d'
+ consolidatePlains = B.fromList . consolidatePlains' . B.toList
+ consolidatePlains' zs@(Plain _ : _) =
+ let (xs, ys) = span isPlain zs in
+ Para (concatMap extractContents xs) : consolidatePlains' ys
+ consolidatePlains' (x : xs) = x : consolidatePlains' xs
+ consolidatePlains' [] = []
+ isPlain (Plain _) = True
+ isPlain _ = False
+ extractContents (Plain xs) = xs
+ extractContents _ = []
+
+docHToInlines :: Bool -> DocH String Identifier -> Inlines
+docHToInlines isCode d' =
+ case d' of
+ DocEmpty -> mempty
+ DocAppend d1 d2 -> mappend (docHToInlines isCode d1)
+ (docHToInlines isCode d2)
+ DocString s
+ | isCode -> mconcat $ intersperse B.linebreak
+ $ map B.code $ splitBy (=='\n') s
+ | otherwise -> B.text s
+ DocParagraph _ -> mempty
+ DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s
+ DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s
+ DocModule s -> B.codeWith ("",["haskell","module"],[]) s
+ DocWarning _ -> mempty -- TODO
+ DocEmphasis d -> B.emph (docHToInlines isCode d)
+ DocMonospaced (DocString s) -> B.code s
+ DocMonospaced d -> docHToInlines True d
+ DocBold d -> B.strong (docHToInlines isCode d)
+#if MIN_VERSION_haddock_library(1,4,0)
+ DocMathInline s -> B.math s
+ DocMathDisplay s -> B.displayMath s
+#endif
+ DocHeader _ -> mempty
+ DocUnorderedList _ -> mempty
+ DocOrderedList _ -> mempty
+ DocDefList _ -> mempty
+ DocCodeBlock _ -> mempty
+ DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h)
+ (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h)
+ DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p)
+ (maybe mempty B.text $ pictureTitle p)
+ DocAName s -> B.spanWith (s,["anchor"],[]) mempty
+ DocProperty _ -> mempty
+ DocExamples _ -> mempty
+
+-- | Create an 'Example', stripping superfluous characters as appropriate
+makeExample :: String -> String -> [String] -> Blocks
+makeExample prompt expression result =
+ B.para $ B.codeWith ("",["prompt"],[]) prompt
+ <> B.space
+ <> B.codeWith ([], ["haskell","expr"], []) (trim expression)
+ <> B.linebreak
+ <> (mconcat $ intersperse B.linebreak $ map coder result')
+ where
+ -- 1. drop trailing whitespace from the prompt, remember the prefix
+ prefix = takeWhile (`elem` " \t") prompt
+
+ -- 2. drop, if possible, the exact same sequence of whitespace
+ -- characters from each result line
+ --
+ -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
+ -- empty line
+ result' = map (substituteBlankLine . tryStripPrefix prefix) result
+ where
+ tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+
+ substituteBlankLine "<BLANKLINE>" = ""
+ substituteBlankLine line = line
+ coder = B.codeWith ([], ["result"], [])
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..9f9a79535
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,1437 @@
+{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXBlock,
+ inlineCommand,
+ ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
+ mathDisplay, mathInline)
+import Data.Char ( chr, ord, isLetter, isAlphaNum )
+import Control.Monad
+import Text.Pandoc.Builder
+import Control.Applicative ((<|>), many, optional)
+import Data.Maybe (fromMaybe, maybeToList)
+import System.FilePath (replaceExtension, takeExtension, addExtension)
+import Data.List (intercalate)
+import qualified Data.Map as M
+import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.ImageSize (numUnit, showFl)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, report,
+ readFileFromDirs)
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m Pandoc
+readLaTeX opts ltx = do
+ parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+parseLaTeX :: PandocMonad m => LP m Pandoc
+parseLaTeX = do
+ bs <- blocks
+ eof
+ st <- getState
+ let meta = stateMeta st
+ let (Pandoc _ bs') = doc bs
+ return $ Pandoc meta bs'
+
+type LP m = ParserT String ParserState m
+
+anyControlSeq :: PandocMonad m => LP m String
+anyControlSeq = do
+ char '\\'
+ next <- option '\n' anyChar
+ case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
+
+controlSeq :: PandocMonad m => String -> LP m String
+controlSeq name = try $ do
+ char '\\'
+ case name of
+ "" -> mzero
+ [c] | not (isLetter c) -> string [c]
+ cs -> string cs <* notFollowedBy letter <* optional sp
+ return name
+
+dimenarg :: PandocMonad m => LP m String
+dimenarg = try $ do
+ ch <- option "" $ string "="
+ num <- many1 digit
+ dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ return $ ch ++ num ++ dim
+
+sp :: PandocMonad m => LP m ()
+sp = whitespace <|> endline
+
+whitespace :: PandocMonad m => LP m ()
+whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+
+endline :: PandocMonad m => LP m ()
+endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+tildeEscape :: PandocMonad m => LP m Char
+tildeEscape = try $ do
+ string "^^"
+ c <- satisfy (\x -> x >= '\0' && x <= '\128')
+ d <- if isLowerHex c
+ then option "" $ count 1 (satisfy isLowerHex)
+ else return ""
+ if null d
+ then case ord c of
+ x | x >= 64 && x <= 127 -> return $ chr (x - 64)
+ | otherwise -> return $ chr (x + 64)
+ else return $ chr $ read ('0':'x':c:d)
+
+comment :: PandocMonad m => LP m ()
+comment = do
+ char '%'
+ skipMany (satisfy (/='\n'))
+ optional newline
+ return ()
+
+bgroup :: PandocMonad m => LP m ()
+bgroup = try $ do
+ skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
+ () <$ char '{'
+ <|> () <$ controlSeq "bgroup"
+ <|> () <$ controlSeq "begingroup"
+
+egroup :: PandocMonad m => LP m ()
+egroup = () <$ char '}'
+ <|> () <$ controlSeq "egroup"
+ <|> () <$ controlSeq "endgroup"
+
+grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
+grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)
+
+braced :: PandocMonad m => LP m String
+braced = bgroup *> (concat <$> manyTill
+ ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
+ <|> try (string "\\}")
+ <|> try (string "\\{")
+ <|> try (string "\\\\")
+ <|> ((\x -> "{" ++ x ++ "}") <$> braced)
+ <|> count 1 anyChar
+ ) egroup)
+
+bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
+bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
+
+mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
+mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
+
+mathInline :: PandocMonad m => LP m String -> LP m Inlines
+mathInline p = math <$> (try p >>= applyMacros')
+
+mathChars :: PandocMonad m => LP m String
+mathChars =
+ concat <$> many (escapedChar
+ <|> (snd <$> withRaw braced)
+ <|> many1 (satisfy isOrdChar))
+ where escapedChar = try $ do char '\\'
+ c <- anyChar
+ return ['\\',c]
+ isOrdChar '$' = False
+ isOrdChar '{' = False
+ isOrdChar '}' = False
+ isOrdChar '\\' = False
+ isOrdChar _ = True
+
+quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
+quoted' f starter ender = do
+ startchs <- starter
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then do
+ ils <- many (notFollowedBy ender >> inline)
+ (ender >> return (f (mconcat ils))) <|>
+ (<> mconcat ils) <$>
+ lit (case startchs of
+ "``" -> "“"
+ "`" -> "‘"
+ _ -> startchs)
+ else lit startchs
+
+doubleQuote :: PandocMonad m => LP m Inlines
+doubleQuote = do
+ quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+ <|> quoted' doubleQuoted (string "“") (void $ char '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
+ <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+
+singleQuote :: PandocMonad m => LP m Inlines
+singleQuote = do
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+ <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+ else str <$> many1 (oneOf "`\'‘’")
+
+inline :: PandocMonad m => LP m Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ whitespace)
+ <|> (softbreak <$ endline)
+ <|> inlineText
+ <|> inlineCommand
+ <|> inlineEnvironment
+ <|> inlineGroup
+ <|> (char '-' *> option (str "-")
+ (char '-' *> option (str "–") (str "—" <$ char '-')))
+ <|> doubleQuote
+ <|> singleQuote
+ <|> (str "”" <$ try (string "''"))
+ <|> (str "”" <$ char '”')
+ <|> (str "’" <$ char '\'')
+ <|> (str "’" <$ char '’')
+ <|> (str "\160" <$ char '~')
+ <|> mathDisplay (string "$$" *> mathChars <* string "$$")
+ <|> mathInline (char '$' *> mathChars <* char '$')
+ <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
+ <|> (str . (:[]) <$> tildeEscape)
+ <|> (do res <- oneOf "#&~^'`\"[]"
+ pos <- getPosition
+ report $ ParsingUnescaped [res] pos
+ return $ str [res])
+
+inlines :: PandocMonad m => LP m Inlines
+inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+
+inlineGroup :: PandocMonad m => LP m Inlines
+inlineGroup = do
+ ils <- grouped inline
+ if isNull ils
+ then return mempty
+ else return $ spanWith nullAttr ils
+ -- we need the span so we can detitlecase bibtex entries;
+ -- we need to know when something is {C}apitalized
+
+block :: PandocMonad m => LP m Blocks
+block = (mempty <$ comment)
+ <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
+ <|> environment
+ <|> include
+ <|> macro
+ <|> blockCommand
+ <|> paragraph
+ <|> grouped block
+ <|> (mempty <$ char '&') -- loose & in table environment
+
+
+blocks :: PandocMonad m => LP m Blocks
+blocks = mconcat <$> many block
+
+getRawCommand :: PandocMonad m => String -> LP m String
+getRawCommand name' = do
+ rawargs <- withRaw (many (try (optional sp *> opt)) *>
+ option "" (try (optional sp *> dimenarg)) *>
+ many braced)
+ return $ '\\' : name' ++ snd rawargs
+
+lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where
+ lookupList l m = msum $ map (`M.lookup` m) l
+
+blockCommand :: PandocMonad m => LP m Blocks
+blockCommand = try $ do
+ name <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" (string "*" <* optional sp)
+ let name' = name ++ star
+ let raw = do
+ rawcommand <- getRawCommand name'
+ transformed <- applyMacros' rawcommand
+ guard $ transformed /= rawcommand
+ notFollowedBy $ parseFromString inlines transformed
+ parseFromString blocks transformed
+ lookupListDefault raw [name',name] blockCommands
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+-- eat an optional argument and one or more arguments in braces
+ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
+ignoreInlines name = (name, p)
+ where
+ p = do oa <- optargs
+ let rawCommand = '\\':name ++ oa
+ let doraw = guardRaw >> return (rawInline "latex" rawCommand)
+ doraw <|> ignore rawCommand
+
+guardRaw :: PandocMonad m => LP m ()
+guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex
+
+optargs :: PandocMonad m => LP m String
+optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))
+
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
+ignoreBlocks name = (name, p)
+ where
+ p = do oa <- optargs
+ let rawCommand = '\\':name ++ oa
+ let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
+ doraw <|> ignore rawCommand
+
+blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
+blockCommands = M.fromList $
+ [ ("par", mempty <$ skipopts)
+ , ("parbox", braced >> grouped blocks)
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
+ -- sectioning
+ , ("chapter", updateState (\s -> s{ stateHasChapters = True })
+ *> section nullAttr 0)
+ , ("chapter*", updateState (\s -> s{ stateHasChapters = True })
+ *> section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
+ -- beamer slides
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ --
+ , ("hrule", pure horizontalRule)
+ , ("strut", pure mempty)
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("item", skipopts *> looseItem)
+ , ("documentclass", skipopts *> braced *> preamble)
+ , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+ , ("caption", skipopts *> setCaption)
+ , ("bibliography", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs))
+ , ("addbibresource", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs))
+ -- includes
+ , ("lstinputlisting", inputListing)
+ ] ++ map ignoreBlocks
+ -- these commands will be ignored unless --parse-raw is specified,
+ -- in which case they will appear as raw latex blocks
+ [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
+ -- newcommand, etc. should be parsed by macro, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "markboth", "markright", "markleft"
+ , "newpage"
+ ]
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ stateMeta = addMetaField field val $ stateMeta st }
+
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+
+setCaption :: PandocMonad m => LP m Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces' >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ stateCaption = Just ils' }
+ return mempty
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
+
+authors :: PandocMonad m => LP m ()
+authors = try $ do
+ char '{'
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >>
+ (inline <|> mempty <$ blockCommand))
+ -- skip e.g. \vspace{10pt}
+ auths <- sepBy oneAuthor (controlSeq "and")
+ char '}'
+ addMeta "author" (map trimInlines auths)
+
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
+ hasChapters <- stateHasChapters `fmap` getState
+ let lvl' = if hasChapters then lvl + 1 else lvl
+ skipopts
+ contents <- grouped inline
+ lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
+ attr' <- registerHeader (lab, classes, kvs) contents
+ return $ headerWith attr' lvl' contents
+
+inlineCommand :: PandocMonad m => LP m Inlines
+inlineCommand = try $ do
+ name <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ guard $ not $ isBlockCommand name
+ exts <- getOption readerExtensions
+ star <- option "" (string "*")
+ let name' = name ++ star
+ let raw = do
+ rawargs <- withRaw
+ (skipangles *> skipopts *> option "" dimenarg *> many braced)
+ let rawcommand = '\\' : name ++ star ++ snd rawargs
+ transformed <- applyMacros' rawcommand
+ if transformed /= rawcommand
+ then parseFromString inlines transformed
+ else if extensionEnabled Ext_raw_tex exts
+ then return $ rawInline "latex" rawcommand
+ else ignore rawcommand
+ (lookupListDefault mzero [name',name] inlineCommands <*
+ optional (try (string "{}")))
+ <|> raw
+
+unlessParseRaw :: PandocMonad m => LP m ()
+unlessParseRaw = getOption readerExtensions >>=
+ guard . not . extensionEnabled Ext_raw_tex
+
+isBlockCommand :: String -> Bool
+isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
+
+
+inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
+inlineEnvironments = M.fromList
+ [ ("displaymath", mathEnv id Nothing "displaymath")
+ , ("math", math <$> verbEnv "math")
+ , ("equation", mathEnv id Nothing "equation")
+ , ("equation*", mathEnv id Nothing "equation*")
+ , ("gather", mathEnv id (Just "gathered") "gather")
+ , ("gather*", mathEnv id (Just "gathered") "gather*")
+ , ("multline", mathEnv id (Just "gathered") "multline")
+ , ("multline*", mathEnv id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
+ , ("align", mathEnv id (Just "aligned") "align")
+ , ("align*", mathEnv id (Just "aligned") "align*")
+ , ("alignat", mathEnv id (Just "aligned") "alignat")
+ , ("alignat*", mathEnv id (Just "aligned") "alignat*")
+ ]
+
+inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
+inlineCommands = M.fromList $
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("ldots", lit "…")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", unlessParseRaw >> (inBrackets <$> tok))
+ , ("ref", unlessParseRaw >> (inBrackets <$> tok))
+ , ("noindent", unlessParseRaw >> ignore "noindent")
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
+ , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
+ , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
+ , ("ensuremath", mathInline braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", try $ tok >>= accent hungarumlaut)
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent circ)
+ , ("~", option (str "~") $ try $ tok >>= accent tilde)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , (".", option (str ".") $ try $ tok >>= accent dot)
+ , ("=", option (str "=") $ try $ tok >>= accent macron)
+ , ("c", option (str "c") $ try $ tok >>= accent cedilla)
+ , ("v", option (str "v") $ try $ tok >>= accent hacek)
+ , ("u", option (str "u") $ try $ tok >>= accent breve)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
+ , (",", pure mempty)
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("verb", doverb)
+ , ("lstinline", skipopts *> doverb)
+ , ("Verb", doverb)
+ , ("texttt", (code . stringify . toList) <$> tok)
+ , ("url", (unescapeURL <$> braced) >>= \url ->
+ pure (link url "" (str url)))
+ , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
+ tok >>= \lab ->
+ pure (link url "" lab))
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL . removeDoubleQuotes <$> braced
+ mkImage options src)
+ , ("enquote", enquote)
+ , ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ ] ++ map ignoreInlines
+ -- these commands will be ignored unless --parse-raw is specified,
+ -- in which case they will appear as raw latex blocks:
+ [ "index" ]
+
+mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
+mkImage options src = do
+ let replaceTextwidth (k,v) = case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let attr = ("",[], kvs)
+ let alt = str "image"
+ case takeExtension src of
+ "" -> do
+ defaultExt <- getOption readerDefaultImageExtension
+ return $ imageWith attr (addExtension src defaultExt) "" alt
+ _ -> return $ imageWith attr src "" alt
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+enquote :: PandocMonad m => LP m Inlines
+enquote = do
+ skipopts
+ context <- stateQuoteContext <$> getState
+ if context == InDoubleQuote
+ then singleQuoted <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted <$> withQuoteContext InDoubleQuote tok
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+ marker <- anyChar
+ code <$> manyTill (satisfy (/='\n')) (char marker)
+
+doLHSverb :: PandocMonad m => LP m Inlines
+doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+
+lit :: String -> LP m Inlines
+lit = pure . str
+
+accent :: (Char -> String) -> Inlines -> LP m Inlines
+accent f ils =
+ case toList ils of
+ (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
+ [] -> mzero
+ _ -> return ils
+
+grave :: Char -> String
+grave 'A' = "À"
+grave 'E' = "È"
+grave 'I' = "Ì"
+grave 'O' = "Ò"
+grave 'U' = "Ù"
+grave 'a' = "à"
+grave 'e' = "è"
+grave 'i' = "ì"
+grave 'o' = "ò"
+grave 'u' = "ù"
+grave c = [c]
+
+acute :: Char -> String
+acute 'A' = "Á"
+acute 'E' = "É"
+acute 'I' = "Í"
+acute 'O' = "Ó"
+acute 'U' = "Ú"
+acute 'Y' = "Ý"
+acute 'a' = "á"
+acute 'e' = "é"
+acute 'i' = "í"
+acute 'o' = "ó"
+acute 'u' = "ú"
+acute 'y' = "ý"
+acute 'C' = "Ć"
+acute 'c' = "ć"
+acute 'L' = "Ĺ"
+acute 'l' = "ĺ"
+acute 'N' = "Ń"
+acute 'n' = "ń"
+acute 'R' = "Ŕ"
+acute 'r' = "ŕ"
+acute 'S' = "Ś"
+acute 's' = "ś"
+acute 'Z' = "Ź"
+acute 'z' = "ź"
+acute c = [c]
+
+circ :: Char -> String
+circ 'A' = "Â"
+circ 'E' = "Ê"
+circ 'I' = "Î"
+circ 'O' = "Ô"
+circ 'U' = "Û"
+circ 'a' = "â"
+circ 'e' = "ê"
+circ 'i' = "î"
+circ 'o' = "ô"
+circ 'u' = "û"
+circ 'C' = "Ĉ"
+circ 'c' = "ĉ"
+circ 'G' = "Ĝ"
+circ 'g' = "ĝ"
+circ 'H' = "Ĥ"
+circ 'h' = "ĥ"
+circ 'J' = "Ĵ"
+circ 'j' = "ĵ"
+circ 'S' = "Ŝ"
+circ 's' = "ŝ"
+circ 'W' = "Ŵ"
+circ 'w' = "ŵ"
+circ 'Y' = "Ŷ"
+circ 'y' = "ŷ"
+circ c = [c]
+
+tilde :: Char -> String
+tilde 'A' = "Ã"
+tilde 'a' = "ã"
+tilde 'O' = "Õ"
+tilde 'o' = "õ"
+tilde 'I' = "Ĩ"
+tilde 'i' = "ĩ"
+tilde 'U' = "Ũ"
+tilde 'u' = "ũ"
+tilde 'N' = "Ñ"
+tilde 'n' = "ñ"
+tilde c = [c]
+
+umlaut :: Char -> String
+umlaut 'A' = "Ä"
+umlaut 'E' = "Ë"
+umlaut 'I' = "Ï"
+umlaut 'O' = "Ö"
+umlaut 'U' = "Ü"
+umlaut 'a' = "ä"
+umlaut 'e' = "ë"
+umlaut 'i' = "ï"
+umlaut 'o' = "ö"
+umlaut 'u' = "ü"
+umlaut c = [c]
+
+hungarumlaut :: Char -> String
+hungarumlaut 'A' = "A̋"
+hungarumlaut 'E' = "E̋"
+hungarumlaut 'I' = "I̋"
+hungarumlaut 'O' = "Ő"
+hungarumlaut 'U' = "Ű"
+hungarumlaut 'Y' = "ӳ"
+hungarumlaut 'a' = "a̋"
+hungarumlaut 'e' = "e̋"
+hungarumlaut 'i' = "i̋"
+hungarumlaut 'o' = "ő"
+hungarumlaut 'u' = "ű"
+hungarumlaut 'y' = "ӳ"
+hungarumlaut c = [c]
+
+dot :: Char -> String
+dot 'C' = "Ċ"
+dot 'c' = "ċ"
+dot 'E' = "Ė"
+dot 'e' = "ė"
+dot 'G' = "Ġ"
+dot 'g' = "ġ"
+dot 'I' = "İ"
+dot 'Z' = "Ż"
+dot 'z' = "ż"
+dot c = [c]
+
+macron :: Char -> String
+macron 'A' = "Ā"
+macron 'E' = "Ē"
+macron 'I' = "Ī"
+macron 'O' = "Ō"
+macron 'U' = "Ū"
+macron 'a' = "ā"
+macron 'e' = "ē"
+macron 'i' = "ī"
+macron 'o' = "ō"
+macron 'u' = "ū"
+macron c = [c]
+
+cedilla :: Char -> String
+cedilla 'c' = "ç"
+cedilla 'C' = "Ç"
+cedilla 's' = "ş"
+cedilla 'S' = "Ş"
+cedilla 't' = "ţ"
+cedilla 'T' = "Ţ"
+cedilla 'e' = "ȩ"
+cedilla 'E' = "Ȩ"
+cedilla 'h' = "ḩ"
+cedilla 'H' = "Ḩ"
+cedilla 'o' = "o̧"
+cedilla 'O' = "O̧"
+cedilla c = [c]
+
+hacek :: Char -> String
+hacek 'A' = "Ǎ"
+hacek 'a' = "ǎ"
+hacek 'C' = "Č"
+hacek 'c' = "č"
+hacek 'D' = "Ď"
+hacek 'd' = "ď"
+hacek 'E' = "Ě"
+hacek 'e' = "ě"
+hacek 'G' = "Ǧ"
+hacek 'g' = "ǧ"
+hacek 'H' = "Ȟ"
+hacek 'h' = "ȟ"
+hacek 'I' = "Ǐ"
+hacek 'i' = "ǐ"
+hacek 'j' = "ǰ"
+hacek 'K' = "Ǩ"
+hacek 'k' = "ǩ"
+hacek 'L' = "Ľ"
+hacek 'l' = "ľ"
+hacek 'N' = "Ň"
+hacek 'n' = "ň"
+hacek 'O' = "Ǒ"
+hacek 'o' = "ǒ"
+hacek 'R' = "Ř"
+hacek 'r' = "ř"
+hacek 'S' = "Š"
+hacek 's' = "š"
+hacek 'T' = "Ť"
+hacek 't' = "ť"
+hacek 'U' = "Ǔ"
+hacek 'u' = "ǔ"
+hacek 'Z' = "Ž"
+hacek 'z' = "ž"
+hacek c = [c]
+
+breve :: Char -> String
+breve 'A' = "Ă"
+breve 'a' = "ă"
+breve 'E' = "Ĕ"
+breve 'e' = "ĕ"
+breve 'G' = "Ğ"
+breve 'g' = "ğ"
+breve 'I' = "Ĭ"
+breve 'i' = "ĭ"
+breve 'O' = "Ŏ"
+breve 'o' = "ŏ"
+breve 'U' = "Ŭ"
+breve 'u' = "ŭ"
+breve c = [c]
+
+tok :: PandocMonad m => LP m Inlines
+tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
+
+opt :: PandocMonad m => LP m Inlines
+opt = bracketed inline
+
+rawopt :: PandocMonad m => LP m String
+rawopt = do
+ contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
+ try (string "\\[") <|> rawopt)
+ optional sp
+ return $ "[" ++ contents ++ "]"
+
+skipopts :: PandocMonad m => LP m ()
+skipopts = skipMany rawopt
+
+-- opts in angle brackets are used in beamer
+rawangle :: PandocMonad m => LP m ()
+rawangle = try $ do
+ char '<'
+ skipMany (noneOf ">")
+ char '>'
+ return ()
+
+skipangles :: PandocMonad m => LP m ()
+skipangles = skipMany rawangle
+
+inlineText :: PandocMonad m => LP m Inlines
+inlineText = str <$> many1 inlineChar
+
+inlineChar :: PandocMonad m => LP m Char
+inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
+
+environment :: PandocMonad m => LP m Blocks
+environment = do
+ controlSeq "begin"
+ name <- braced
+ M.findWithDefault mzero name environments
+ <|> rawEnv name
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- braced
+ M.findWithDefault mzero name inlineEnvironments
+
+rawEnv :: PandocMonad m => String -> LP m Blocks
+rawEnv name = do
+ exts <- getOption readerExtensions
+ let parseRaw = extensionEnabled Ext_raw_tex exts
+ rawOptions <- mconcat <$> many rawopt
+ let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
+ unless parseRaw $ do
+ pos1 <- getPosition
+ report $ SkippedContent beginCommand pos1
+ (bs, raw) <- withRaw $ env name blocks
+ raw' <- applyMacros' raw
+ if parseRaw
+ then return $ rawBlock "latex" $ beginCommand ++ raw'
+ else do
+ pos2 <- getPosition
+ report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
+ return bs
+
+----
+
+braced' :: PandocMonad m => LP m String
+braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
+
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
+include :: PandocMonad m => LP m Blocks
+include = do
+ fs' <- try $ do
+ char '\\'
+ name <- try (string "include")
+ <|> try (string "input")
+ <|> string "usepackage"
+ -- skip options
+ skipMany $ try $ char '[' *> manyTill anyChar (char ']')
+ fs <- (map trim . splitBy (==',')) <$> braced'
+ return $ if name == "usepackage"
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mconcat <$> mapM (insertIncludedFile blocks dirs) fs'
+
+inputListing :: PandocMonad m => LP m Blocks
+inputListing = do
+ pos <- getPosition
+ options <- option [] keyvals
+ f <- filter (/='"') <$> braced
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs f
+ codeLines <- case mbCode of
+ Just s -> return $ lines s
+ Nothing -> do
+ report $ CouldNotLoadIncludeFile f pos
+ return []
+ let (ident,classes,kvs) = parseListingsOptions options
+ let language = case lookup "language" options >>= fromListingsLanguage of
+ Just l -> [l]
+ Nothing -> take 1 $ languagesByExtension (takeExtension f)
+ let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
+ let lastline = fromMaybe (length codeLines) $
+ lookup "lastline" options >>= safeRead
+ let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
+ drop (firstline - 1) codeLines
+ return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
+
+parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions options =
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ ++ maybeToList (lookup "language" options
+ >>= fromListingsLanguage)
+ in (fromMaybe "" (lookup "label" options), classes, kvs)
+
+----
+
+keyval :: PandocMonad m => LP m (String, String)
+keyval = try $ do
+ key <- many1 alphaNum
+ val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
+ skipMany spaceChar
+ optional (char ',')
+ skipMany spaceChar
+ return (key, val)
+
+
+keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals = try $ char '[' *> manyTill keyval (char ']')
+
+alltt :: PandocMonad m => String -> LP m Blocks
+alltt t = walk strToCode <$> parseFromString blocks
+ (substitute " " "\\ " $ substitute "%" "\\%" $
+ intercalate "\\\\\n" $ lines t)
+ where strToCode (Str s) = Code nullAttr s
+ strToCode x = x
+
+rawLaTeXBlock :: PandocMonad m => LP m String
+rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
+
+rawLaTeXInline :: PandocMonad m => LP m Inline
+rawLaTeXInline = do
+ raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
+ RawInline "latex" <$> applyMacros' raw
+
+addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
+addImageCaption = walkM go
+ where go (Image attr alt (src,tit)) = do
+ mbcapt <- stateCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Image attr (toList ils) (src, "fig:")
+ Nothing -> Image attr alt (src,tit)
+ go x = return x
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table c als ws hs rs) = do
+ mbcapt <- stateCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
+ go x = return x
+
+environments :: PandocMonad m => M.Map String (LP m Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks <* skipMany anyChar)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
+ , ("minipage", env "minipage" $
+ skipopts *> spaces' *> optional braced *> spaces' *> blocks)
+ , ("figure", env "figure" $
+ resetCaption *> skipopts *> blocks >>= addImageCaption)
+ , ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> simpTable False >>= addTableCaption)
+ , ("table", env "table" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular" $ simpTable True)
+ , ("tabular", env "tabular" $ simpTable False)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", orderedList')
+ , ("alltt", alltt =<< verbEnv "alltt")
+ , ("code", guardEnabled Ext_literate_haskell *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
+ , ("Verbatim", fancyverbEnv "Verbatim")
+ , ("BVerbatim", fancyverbEnv "BVerbatim")
+ , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
+ codeBlockWith attr <$> verbEnv "lstlisting")
+ , ("minted", do options <- option [] keyvals
+ lang <- grouped (many1 $ satisfy (/='}'))
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ lang | not (null lang) ] ++
+ [ "numberLines" |
+ lookup "linenos" options == Just "true" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv "minted")
+ , ("obeylines", parseFromString
+ (para . trimInlines . mconcat <$> many inline) =<<
+ intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
+ , ("displaymath", mathEnv para Nothing "displaymath")
+ , ("equation", mathEnv para Nothing "equation")
+ , ("equation*", mathEnv para Nothing "equation*")
+ , ("gather", mathEnv para (Just "gathered") "gather")
+ , ("gather*", mathEnv para (Just "gathered") "gather*")
+ , ("multline", mathEnv para (Just "gathered") "multline")
+ , ("multline*", mathEnv para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
+ , ("align", mathEnv para (Just "aligned") "align")
+ , ("align*", mathEnv para (Just "aligned") "align*")
+ , ("alignat", mathEnv para (Just "aligned") "alignat")
+ , ("alignat*", mathEnv para (Just "aligned") "alignat*")
+ ]
+
+letterContents :: PandocMonad m => LP m Blocks
+letterContents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case lookupMeta "address" (stateMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
+ return $ addr <> bs -- sig added by \closing
+
+closing :: PandocMonad m => LP m Blocks
+closing = do
+ contents <- tok
+ st <- getState
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (stateMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
+ return $ para (trimInlines contents) <> sigs
+
+item :: PandocMonad m => LP m Blocks
+item = blocks *> controlSeq "item" *> skipopts *> blocks
+
+looseItem :: PandocMonad m => LP m Blocks
+looseItem = do
+ ctx <- stateParserContext `fmap` getState
+ if ctx == ListItemState
+ then mzero
+ else return mempty
+
+descItem :: PandocMonad m => LP m (Inlines, [Blocks])
+descItem = do
+ blocks -- skip blocks before item
+ controlSeq "item"
+ optional sp
+ ils <- opt
+ bs <- blocks
+ return (ils, [bs])
+
+env :: PandocMonad m => String -> LP m a -> LP m a
+env name p = p <*
+ (try (controlSeq "end" *> braced >>= guard . (== name))
+ <?> ("\\end{" ++ name ++ "}"))
+
+listenv :: PandocMonad m => String -> LP m a -> LP m a
+listenv name p = try $ do
+ oldCtx <- stateParserContext `fmap` getState
+ updateState $ \st -> st{ stateParserContext = ListItemState }
+ res <- env name p
+ updateState $ \st -> st{ stateParserContext = oldCtx }
+ return res
+
+mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a
+mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
+ "\\end{" ++ y ++ "}"
+
+verbEnv :: PandocMonad m => String -> LP m String
+verbEnv name = do
+ skipopts
+ optional blankline
+ let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
+ res <- manyTill anyChar endEnv
+ return $ stripTrailingNewlines res
+
+fancyverbEnv :: PandocMonad m => String -> LP m Blocks
+fancyverbEnv name = do
+ options <- option [] keyvals
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv name
+
+orderedList' :: PandocMonad m => LP m Blocks
+orderedList' = do
+ optional sp
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ char '[' *> anyOrderedListMarker <* char ']'
+ spaces
+ optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
+ spaces
+ start <- option 1 $ try $ do controlSeq "setcounter"
+ grouped (string "enum" *> many1 (oneOf "iv"))
+ optional sp
+ num <- grouped (many1 digit)
+ spaces
+ return (read num + 1 :: Int)
+ bs <- listenv "enumerate" (many item)
+ return $ orderedListWith (start, style, delim) bs
+
+paragraph :: PandocMonad m => LP m Blocks
+paragraph = do
+ x <- trimInlines . mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para x
+
+preamble :: PandocMonad m => LP m Blocks
+preamble = mempty <$> manyTill preambleBlock beginDoc
+ where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
+ preambleBlock = void comment
+ <|> void sp
+ <|> void blanklines
+ <|> void include
+ <|> void macro
+ <|> void blockCommand
+ <|> void anyControlSeq
+ <|> void braced
+ <|> void anyChar
+
+-------
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: PandocMonad m => LP m [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ char '{'
+ optional sp
+ keys <- manyTill citationLabel (char '}')
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+citationLabel :: PandocMonad m => LP m String
+citationLabel = optional sp *>
+ (many1 (satisfy isBibtexKeyChar)
+ <* optional sp
+ <* optional (char ',')
+ <* optional sp)
+ where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
+
+cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+
+citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+
+complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
+complexNatbibCitation mode = try $ do
+ let ils = (toList . trimInlines . mconcat) <$>
+ many (notFollowedBy (oneOf "\\};") >> inline)
+ let parseOne = try $ do
+ skipSpaces
+ pref <- ils
+ cit' <- inline -- expect a citation
+ let citlist = toList cit'
+ cits' <- case citlist of
+ [Cite cs _] -> return cs
+ _ -> mzero
+ suff <- ils
+ skipSpaces
+ optional $ char ';'
+ return $ addPrefix pref $ addSuffix suff cits'
+ (c:cits, raw) <- withRaw $ grouped parseOne
+ return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ raw)
+
+-- tables
+
+parseAligns :: PandocMonad m => LP m [(String, Alignment, String)]
+parseAligns = try $ do
+ char '{'
+ let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
+ maybeBar
+ let cAlign = AlignCenter <$ char 'c'
+ let lAlign = AlignLeft <$ char 'l'
+ let rAlign = AlignRight <$ char 'r'
+ let parAlign = AlignLeft <$ (char 'p' >> braced)
+ let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
+ let alignPrefix = char '>' >> braced
+ let alignSuffix = char '<' >> braced
+ let alignSpec = do
+ spaces
+ pref <- option "" alignPrefix
+ spaces
+ ch <- alignChar
+ spaces
+ suff <- option "" alignSuffix
+ return (pref, ch, suff)
+ aligns' <- sepEndBy alignSpec maybeBar
+ spaces
+ char '}'
+ spaces
+ return $ aligns'
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces'
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces'
+ optional $ bracketed (many1 (satisfy (/=']')))
+ return ()
+
+lbreak :: PandocMonad m => LP m ()
+lbreak = () <$ try (spaces' *>
+ (controlSeq "\\" <|> controlSeq "tabularnewline") <*
+ spaces')
+
+amp :: PandocMonad m => LP m ()
+amp = () <$ try (spaces' *> char '&' <* spaces')
+
+parseTableRow :: PandocMonad m
+ => Int -- ^ number of columns
+ -> [String] -- ^ prefixes
+ -> [String] -- ^ suffixes
+ -> LP m [Blocks]
+parseTableRow cols prefixes suffixes = try $ do
+ let tableCellRaw = many (notFollowedBy
+ (amp <|> lbreak <|>
+ (() <$ try (string "\\end"))) >> anyChar)
+ let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
+ env "minipage"
+ (skipopts *> spaces' *> optional braced *> spaces' *> blocks)
+ let tableCell = minipage <|>
+ ((plain . trimInlines . mconcat) <$> many inline)
+ rawcells <- sepBy1 tableCellRaw amp
+ guard $ length rawcells == cols
+ let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s)
+ rawcells prefixes suffixes
+ cells' <- mapM (parseFromString tableCell) rawcells'
+ let numcells = length cells'
+ guard $ numcells <= cols && numcells >= 1
+ guard $ cells' /= [mempty]
+ -- note: a & b in a three-column table leaves an empty 3rd cell:
+ let cells'' = cells' ++ replicate (cols - numcells) mempty
+ spaces'
+ return cells''
+
+spaces' :: PandocMonad m => LP m ()
+spaces' = spaces *> skipMany (comment *> spaces)
+
+simpTable :: PandocMonad m => Bool -> LP m Blocks
+simpTable hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ (spaces' >> tok)
+ skipopts
+ (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns
+ let cols = length aligns
+ optional $ controlSeq "caption" *> skipopts *> setCaption
+ optional lbreak
+ spaces'
+ skipMany hline
+ spaces'
+ header' <- option [] $ try (parseTableRow cols prefixes suffixes <*
+ lbreak <* many1 hline)
+ spaces'
+ rows <- sepEndBy (parseTableRow cols prefixes suffixes)
+ (lbreak <* optional (skipMany hline))
+ spaces'
+ optional $ controlSeq "caption" *> skipopts *> setCaption
+ optional lbreak
+ spaces'
+ let header'' = if null header'
+ then replicate cols mempty
+ else header'
+ lookAhead $ controlSeq "end" -- make sure we're at end
+ return $ table mempty (zip aligns (repeat 0)) header'' rows
+
+removeDoubleQuotes :: String -> String
+removeDoubleQuotes ('"':xs) =
+ case reverse xs of
+ '"':ys -> reverse ys
+ _ -> '"':xs
+removeDoubleQuotes xs = xs
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..80a1cd7a2
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,2119 @@
+{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Markdown
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
+
+import Data.List ( transpose, sortBy, findIndex, intercalate )
+import qualified Data.Map as M
+import Data.Scientific (coefficient, base10Exponent)
+import Data.Ord ( comparing )
+import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
+import Data.Maybe
+import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Generic (bottomUp)
+import qualified Data.Text as T
+import Data.Text (Text)
+import qualified Data.Yaml as Yaml
+import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..))
+import qualified Data.HashMap.Strict as H
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Vector as V
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (charWidth)
+import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.Parsing hiding (tableWith)
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
+import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
+ isTextTag, isCommentTag )
+import Control.Monad
+import System.FilePath (takeExtension, addExtension)
+import Text.HTML.TagSoup
+import Data.Monoid ((<>))
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError, catchError)
+import Text.Pandoc.Class (PandocMonad, report)
+
+type MarkdownParser m = ParserT [Char] ParserState m
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readMarkdown opts s = do
+ parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
+--
+-- Constants and data structure definitions
+--
+
+isBulletListMarker :: Char -> Bool
+isBulletListMarker '*' = True
+isBulletListMarker '+' = True
+isBulletListMarker '-' = True
+isBulletListMarker _ = False
+
+isHruleChar :: Char -> Bool
+isHruleChar '*' = True
+isHruleChar '-' = True
+isHruleChar '_' = True
+isHruleChar _ = False
+
+setextHChars :: String
+setextHChars = "=-"
+
+isBlank :: Char -> Bool
+isBlank ' ' = True
+isBlank '\t' = True
+isBlank '\n' = True
+isBlank _ = False
+
+--
+-- auxiliary functions
+--
+
+-- | Succeeds when we're in list context.
+inList :: PandocMonad m => MarkdownParser m ()
+inList = do
+ ctx <- stateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+spnl :: PandocMonad m => ParserT [Char] st m ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+indentSpaces :: PandocMonad m => MarkdownParser m String
+indentSpaces = try $ do
+ tabStop <- getOption readerTabStop
+ count tabStop (char ' ') <|>
+ string "\t" <?> "indentation"
+
+nonindentSpaces :: PandocMonad m => MarkdownParser m String
+nonindentSpaces = do
+ tabStop <- getOption readerTabStop
+ sps <- many (char ' ')
+ if length sps < tabStop
+ then return sps
+ else unexpected "indented line"
+
+-- returns number of spaces parsed
+skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
+skipNonindentSpaces = do
+ tabStop <- getOption readerTabStop
+ atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ')
+
+atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int
+atMostSpaces n
+ | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0
+ | otherwise = return 0
+
+litChar :: PandocMonad m => MarkdownParser m Char
+litChar = escapedChar'
+ <|> characterReference
+ <|> noneOf "\n"
+ <|> try (newline >> notFollowedBy blankline >> return ' ')
+
+-- | Parse a sequence of inline elements between square brackets,
+-- including inlines between balanced pairs of square brackets.
+inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
+inlinesInBalancedBrackets = do
+ char '['
+ (_, raw) <- withRaw $ charsInBalancedBrackets 1
+ guard $ not $ null raw
+ parseFromString (trimInlinesF . mconcat <$> many inline) (init raw)
+
+charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m ()
+charsInBalancedBrackets 0 = return ()
+charsInBalancedBrackets openBrackets =
+ (char '[' >> charsInBalancedBrackets (openBrackets + 1))
+ <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1))
+ <|> (( (() <$ code)
+ <|> (() <$ (escapedChar'))
+ <|> (newline >> notFollowedBy blankline)
+ <|> skipMany1 (noneOf "[]`\n\\")
+ <|> (() <$ count 1 (oneOf "`\\"))
+ ) >> charsInBalancedBrackets openBrackets)
+
+--
+-- document structure
+--
+
+rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
+rawTitleBlockLine = do
+ char '%'
+ skipSpaces
+ first <- anyLine
+ rest <- many $ try $ do spaceChar
+ notFollowedBy blankline
+ skipSpaces
+ anyLine
+ return $ trim $ unlines (first:rest)
+
+titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
+titleLine = try $ do
+ raw <- rawTitleBlockLine
+ res <- parseFromString (many inline) raw
+ return $ trimInlinesF $ mconcat res
+
+authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
+authorsLine = try $ do
+ raw <- rawTitleBlockLine
+ let sep = (char ';' <* spaces) <|> newline
+ let pAuthors = sepEndBy
+ (trimInlinesF . mconcat <$> many
+ (try $ notFollowedBy sep >> inline))
+ sep
+ sequence <$> parseFromString pAuthors raw
+
+dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
+dateLine = try $ do
+ raw <- rawTitleBlockLine
+ res <- parseFromString (many inline) raw
+ return $ trimInlinesF $ mconcat res
+
+titleBlock :: PandocMonad m => MarkdownParser m ()
+titleBlock = pandocTitleBlock <|> mmdTitleBlock
+
+pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
+pandocTitleBlock = try $ do
+ guardEnabled Ext_pandoc_title_block
+ lookAhead (char '%')
+ title <- option mempty titleLine
+ author <- option (return []) authorsLine
+ date <- option mempty dateLine
+ optional blanklines
+ let meta' = do title' <- title
+ author' <- author
+ date' <- date
+ return $
+ (if B.isNull title' then id else B.setMeta "title" title')
+ . (if null author' then id else B.setMeta "author" author')
+ . (if B.isNull date' then id else B.setMeta "date" date')
+ $ nullMeta
+ updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+
+-- Adapted from solution at
+-- http://stackoverflow.com/a/29448764/1901888
+foldrWithKeyM :: Monad m => (k -> b -> a -> m a) -> a -> H.HashMap k b -> m a
+foldrWithKeyM f acc = H.foldrWithKey f' (return acc)
+ where
+ f' k b ma = ma >>= \a -> f k b a
+
+yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock = try $ do
+ guardEnabled Ext_yaml_metadata_block
+ pos <- getPosition
+ string "---"
+ blankline
+ notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
+ rawYamlLines <- manyTill anyLine stopLine
+ -- by including --- and ..., we allow yaml blocks with just comments:
+ let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
+ optional blanklines
+ opts <- stateOptions <$> getState
+ meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
+ Right (Yaml.Object hashmap) ->
+ foldrWithKeyM
+ (\k v m -> do
+ if ignorable k
+ then return m
+ else (do v' <- lift $ yamlToMeta opts v
+ return $ B.setMeta (T.unpack k) v' m)
+ `catchError`
+ (\_ -> return m)
+ ) nullMeta hashmap
+ Right Yaml.Null -> return nullMeta
+ Right _ -> do
+ logMessage $
+ CouldNotParseYamlMetadata "not an object"
+ pos
+ return nullMeta
+ Left err' -> do
+ case err' of
+ InvalidYaml (Just YamlParseException{
+ yamlProblem = problem
+ , yamlContext = _ctxt
+ , yamlProblemMark = Yaml.YamlMark {
+ yamlLine = yline
+ , yamlColumn = ycol
+ }}) ->
+ logMessage $ CouldNotParseYamlMetadata
+ problem (setSourceLine
+ (setSourceColumn pos
+ (sourceColumn pos + ycol))
+ (sourceLine pos + 1 + yline))
+ _ -> logMessage $ CouldNotParseYamlMetadata
+ (show err') pos
+ return nullMeta
+ updateState $ \st -> st{ stateMeta' = stateMeta' st <> (return meta') }
+ return mempty
+
+-- ignore fields ending with _
+ignorable :: Text -> Bool
+ignorable t = (T.pack "_") `T.isSuffixOf` t
+
+toMetaValue :: PandocMonad m => ReaderOptions -> Text -> m MetaValue
+toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
+ where
+ toMeta p =
+ case p of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
+ | endsWithNewline x -> MetaBlocks [Para xs]
+ | otherwise -> MetaInlines xs
+ Pandoc _ bs -> MetaBlocks bs
+ endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+ opts' = opts{readerExtensions =
+ disableExtension Ext_pandoc_title_block $
+ disableExtension Ext_mmd_title_block $
+ disableExtension Ext_yaml_metadata_block $
+ readerExtensions opts }
+
+yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue
+yamlToMeta opts (Yaml.String t) = toMetaValue opts t
+yamlToMeta _ (Yaml.Number n)
+ -- avoid decimal points for numbers that don't need them:
+ | base10Exponent n >= 0 = return $ MetaString $ show
+ $ coefficient n * (10 ^ base10Exponent n)
+ | otherwise = return $ MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
+ (V.toList xs)
+yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
+ if ignorable k
+ then m
+ else (do
+ v' <- yamlToMeta opts v
+ m' <- m
+ return (M.insert (T.unpack k) v' m')))
+ (return M.empty) o
+yamlToMeta _ _ = return $ MetaString ""
+
+stopLine :: PandocMonad m => MarkdownParser m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
+
+mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
+mmdTitleBlock = try $ do
+ guardEnabled Ext_mmd_title_block
+ firstPair <- kvPair False
+ restPairs <- many (kvPair True)
+ let kvPairs = firstPair : restPairs
+ blanklines
+ updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+ return (Meta $ M.fromList kvPairs) }
+
+kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
+kvPair allowEmpty = try $ do
+ key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ val <- trim <$> manyTill anyChar
+ (try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ guard $ allowEmpty || not (null val)
+ let key' = concat $ words $ map toLower key
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
+ return (key',val')
+
+parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
+parseMarkdown = do
+ optional titleBlock
+ blocks <- parseBlocks
+ st <- getState
+ let meta = runF (stateMeta' st) st
+ let Pandoc _ bs = B.doc $ runF blocks st
+ eastAsianLineBreaks <- option False $
+ True <$ guardEnabled Ext_east_asian_line_breaks
+ reportLogMessages
+ return $ (if eastAsianLineBreaks
+ then bottomUp softBreakFilter
+ else id) $ Pandoc meta bs
+
+softBreakFilter :: [Inline] -> [Inline]
+softBreakFilter (x:SoftBreak:y:zs) =
+ case (stringify x, stringify y) of
+ (xs@(_:_), (c:_))
+ | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+ _ -> x:SoftBreak:y:zs
+softBreakFilter xs = xs
+
+referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
+referenceKey = try $ do
+ pos <- getPosition
+ skipNonindentSpaces
+ (_,raw) <- reference
+ char ':'
+ skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
+ let sourceURL = liftM unwords $ many $ try $ do
+ skipMany spaceChar
+ notFollowedBy' referenceTitle
+ notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+ notFollowedBy' (() <$ reference)
+ many1 $ notFollowedBy space >> litChar
+ let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
+ src <- try betweenAngles <|> sourceURL
+ tit <- option "" referenceTitle
+ attr <- option nullAttr $ try $
+ guardEnabled Ext_link_attributes >> skipSpaces >> attributes
+ addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
+ >> many (try $ spnl >> keyValAttr)
+ blanklines
+ let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
+ target = (escapeURI $ trimr src, tit)
+ st <- getState
+ let oldkeys = stateKeys st
+ let key = toKey raw
+ case M.lookup key oldkeys of
+ Just _ -> logMessage $ DuplicateLinkReference raw pos
+ Nothing -> return ()
+ updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
+ return $ return mempty
+
+referenceTitle :: PandocMonad m => MarkdownParser m String
+referenceTitle = try $ do
+ skipSpaces >> optional newline >> skipSpaces
+ quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
+
+-- A link title in quotes
+quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
+quotedTitle c = try $ do
+ char c
+ notFollowedBy spaces
+ let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
+ let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
+ let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
+ unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
+
+-- | PHP Markdown Extra style abbreviation key. Currently
+-- we just skip them, since Pandoc doesn't have an element for
+-- an abbreviation.
+abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
+abbrevKey = do
+ guardEnabled Ext_abbreviations
+ try $ do
+ char '*'
+ reference
+ char ':'
+ skipMany (satisfy (/= '\n'))
+ blanklines
+ return $ return mempty
+
+noteMarker :: PandocMonad m => MarkdownParser m String
+noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
+
+rawLine :: PandocMonad m => MarkdownParser m String
+rawLine = try $ do
+ notFollowedBy blankline
+ notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
+ optional indentSpaces
+ anyLine
+
+rawLines :: PandocMonad m => MarkdownParser m String
+rawLines = do
+ first <- anyLine
+ rest <- many rawLine
+ return $ unlines (first:rest)
+
+noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+noteBlock = try $ do
+ pos <- getPosition
+ skipNonindentSpaces
+ ref <- noteMarker
+ char ':'
+ optional blankline
+ optional indentSpaces
+ first <- rawLines
+ rest <- many $ try $ blanklines >> indentSpaces >> rawLines
+ let raw = unlines (first:rest) ++ "\n"
+ optional blanklines
+ parsed <- parseFromString parseBlocks raw
+ let newnote = (ref, parsed)
+ oldnotes <- stateNotes' <$> getState
+ case lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
+ Nothing -> return ()
+ updateState $ \s -> s { stateNotes' = newnote : oldnotes }
+ return mempty
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: PandocMonad m => MarkdownParser m (F Blocks)
+block = do
+ pos <- getPosition
+ res <- choice [ mempty <$ blanklines
+ , codeBlockFenced
+ , yamlMetaBlock
+ -- note: bulletList needs to be before header because of
+ -- the possibility of empty list items: -
+ , bulletList
+ , header
+ , lhsCodeBlock
+ , divHtml
+ , htmlBlock
+ , table
+ , codeBlockIndented
+ , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+ , rawTeXBlock
+ , lineBlock
+ , blockQuote
+ , hrule
+ , orderedList
+ , definitionList
+ , noteBlock
+ , referenceKey
+ , abbrevKey
+ , para
+ , plain
+ ] <?> "block"
+ report $ ParsingTrace
+ (take 60 $ show $ B.toList $ runF res defaultParserState) pos
+ return res
+
+--
+-- header blocks
+--
+
+header :: PandocMonad m => MarkdownParser m (F Blocks)
+header = setextHeader <|> atxHeader <?> "header"
+
+atxChar :: PandocMonad m => MarkdownParser m Char
+atxChar = do
+ exts <- getOption readerExtensions
+ return $ if extensionEnabled Ext_literate_haskell exts
+ then '='
+ else '#'
+
+atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
+atxHeader = try $ do
+ level <- atxChar >>= many1 . char >>= return . length
+ notFollowedBy $ guardEnabled Ext_fancy_lists >>
+ (char '.' <|> char ')') -- this would be a list
+ skipSpaces
+ (text, raw) <- withRaw $
+ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
+ attr <- atxClosing
+ attr' <- registerHeader attr (runF text defaultParserState)
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw attr'
+ return $ B.headerWith attr' level <$> text
+
+atxClosing :: PandocMonad m => MarkdownParser m Attr
+atxClosing = try $ do
+ attr' <- option nullAttr
+ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
+ skipMany . char =<< atxChar
+ skipSpaces
+ attr <- option attr'
+ (guardEnabled Ext_header_attributes >> attributes)
+ blanklines
+ return attr
+
+setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
+setextHeaderEnd = try $ do
+ attr <- option nullAttr
+ $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
+ <|> (guardEnabled Ext_header_attributes >> attributes)
+ blanklines
+ return attr
+
+mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
+mmdHeaderIdentifier = do
+ ident <- stripFirstAndLast . snd <$> reference
+ skipSpaces
+ return (ident,[],[])
+
+setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
+setextHeader = try $ do
+ -- This lookahead prevents us from wasting time parsing Inlines
+ -- unless necessary -- it gives a significant performance boost.
+ lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
+ skipSpaces
+ (text, raw) <- withRaw $
+ trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
+ attr <- setextHeaderEnd
+ underlineChar <- oneOf setextHChars
+ many (char underlineChar)
+ blanklines
+ let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
+ attr' <- registerHeader attr (runF text defaultParserState)
+ guardDisabled Ext_implicit_header_references
+ <|> registerImplicitHeader raw attr'
+ return $ B.headerWith attr' level <$> text
+
+registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
+registerImplicitHeader raw attr@(ident, _, _) = do
+ let key = toKey $ "[" ++ raw ++ "]"
+ updateState (\s -> s { stateHeaderKeys =
+ M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
+
+--
+-- hrule block
+--
+
+hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
+hrule = try $ do
+ skipSpaces
+ start <- satisfy isHruleChar
+ count 2 (skipSpaces >> char start)
+ skipMany (spaceChar <|> char start)
+ newline
+ optional blanklines
+ return $ return B.horizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine :: PandocMonad m => MarkdownParser m String
+indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+
+blockDelimiter :: PandocMonad m
+ => (Char -> Bool)
+ -> Maybe Int
+ -> ParserT [Char] st m Int
+blockDelimiter f len = try $ do
+ c <- lookAhead (satisfy f)
+ case len of
+ Just l -> count l (char c) >> many (char c) >> return l
+ Nothing -> count 3 (char c) >> many (char c) >>=
+ return . (+ 3) . length
+
+attributes :: PandocMonad m => MarkdownParser m Attr
+attributes = try $ do
+ char '{'
+ spnl
+ attrs <- many (attribute <* spnl)
+ char '}'
+ return $ foldl (\x f -> f x) nullAttr attrs
+
+attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
+attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
+
+identifier :: PandocMonad m => MarkdownParser m String
+identifier = do
+ first <- letter
+ rest <- many $ alphaNum <|> oneOf "-_:."
+ return (first:rest)
+
+identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
+identifierAttr = try $ do
+ char '#'
+ result <- identifier
+ return $ \(_,cs,kvs) -> (result,cs,kvs)
+
+classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
+classAttr = try $ do
+ char '.'
+ result <- identifier
+ return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
+
+keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
+keyValAttr = try $ do
+ key <- identifier
+ char '='
+ val <- enclosed (char '"') (char '"') litChar
+ <|> enclosed (char '\'') (char '\'') litChar
+ <|> many (escapedChar' <|> noneOf " \t\n\r}")
+ return $ \(id',cs,kvs) ->
+ case key of
+ "id" -> (val,cs,kvs)
+ "class" -> (id',cs ++ words val,kvs)
+ _ -> (id',cs,kvs ++ [(key,val)])
+
+specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
+specialAttr = do
+ char '-'
+ return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
+
+codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
+codeBlockFenced = try $ do
+ c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+ <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
+ size <- blockDelimiter (== c) Nothing
+ skipMany spaceChar
+ attr <- option ([],[],[]) $
+ try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+ blankline
+ contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+ blanklines
+ return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+
+-- correctly handle github language identifiers
+toLanguageId :: String -> String
+toLanguageId = map toLower . go
+ where go "c++" = "cpp"
+ go "objective-c" = "objectivec"
+ go x = x
+
+codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
+codeBlockIndented = do
+ contents <- many1 (indentedLine <|>
+ try (do b <- blanklines
+ l <- indentedLine
+ return $ b ++ l))
+ optional blanklines
+ classes <- getOption readerIndentedCodeClasses
+ return $ return $ B.codeBlockWith ("", classes, []) $
+ stripTrailingNewlines $ concat contents
+
+lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+lhsCodeBlock = do
+ guardEnabled Ext_literate_haskell
+ (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
+ <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ lhsCodeBlockInverseBird)
+
+lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockLaTeX = try $ do
+ string "\\begin{code}"
+ manyTill spaceChar newline
+ contents <- many1Till anyChar (try $ string "\\end{code}")
+ blanklines
+ return $ stripTrailingNewlines contents
+
+lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
+
+lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
+
+lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
+lhsCodeBlockBirdWith c = try $ do
+ pos <- getPosition
+ when (sourceColumn pos /= 1) $ fail "Not in first column"
+ lns <- many1 $ birdTrackLine c
+ -- if (as is normal) there is always a space after >, drop it
+ let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
+ then map (drop 1) lns
+ else lns
+ blanklines
+ return $ intercalate "\n" lns'
+
+birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
+birdTrackLine c = try $ do
+ char c
+ -- allow html tags on left margin:
+ when (c == '<') $ notFollowedBy letter
+ anyLine
+
+--
+-- block quotes
+--
+
+emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
+emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
+
+emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
+emailBlockQuote = try $ do
+ emailBlockQuoteStart
+ let emailLine = many $ nonEndline <|> try
+ (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n')
+ let emailSep = try (newline >> emailBlockQuoteStart)
+ first <- emailLine
+ rest <- many $ try $ emailSep >> emailLine
+ let raw = first:rest
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
+
+blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
+blockQuote = do
+ raw <- emailBlockQuote
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ return $ B.blockQuote <$> contents
+
+--
+-- list blocks
+--
+
+bulletListStart :: PandocMonad m => MarkdownParser m ()
+bulletListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ startpos <- sourceColumn <$> getPosition
+ skipNonindentSpaces
+ notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
+ satisfy isBulletListMarker
+ endpos <- sourceColumn <$> getPosition
+ tabStop <- getOption readerTabStop
+ lookAhead (newline <|> spaceChar)
+ () <$ atMostSpaces (tabStop - (endpos - startpos))
+
+anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
+anyOrderedListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ startpos <- sourceColumn <$> getPosition
+ skipNonindentSpaces
+ notFollowedBy $ string "p." >> spaceChar >> digit -- page number
+ res <- do guardDisabled Ext_fancy_lists
+ start <- many1 digit >>= safeRead
+ char '.'
+ return (start, DefaultStyle, DefaultDelim)
+ <|> do (num, style, delim) <- anyOrderedListMarker
+ -- if it could be an abbreviated first name,
+ -- insist on more than one space
+ when (delim == Period && (style == UpperAlpha ||
+ (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
+ () <$ spaceChar
+ return (num, style, delim)
+ endpos <- sourceColumn <$> getPosition
+ tabStop <- getOption readerTabStop
+ lookAhead (newline <|> spaceChar)
+ atMostSpaces (tabStop - (endpos - startpos))
+ return res
+
+listStart :: PandocMonad m => MarkdownParser m ()
+listStart = bulletListStart <|> (anyOrderedListStart >> return ())
+
+listLine :: PandocMonad m => MarkdownParser m String
+listLine = try $ do
+ notFollowedBy' (do indentSpaces
+ many spaceChar
+ listStart)
+ notFollowedByHtmlCloser
+ optional (() <$ indentSpaces)
+ listLineCommon
+
+listLineCommon :: PandocMonad m => MarkdownParser m String
+listLineCommon = concat <$> manyTill
+ ( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+ <|> liftM snd (htmlTag isCommentTag)
+ <|> count 1 anyChar
+ ) newline
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: PandocMonad m
+ => MarkdownParser m a
+ -> MarkdownParser m String
+rawListItem start = try $ do
+ start
+ first <- listLineCommon
+ rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
+ blanks <- many blankline
+ return $ unlines (first:rest) ++ blanks
+
+-- continuation of a list item - indented and separated by blankline
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation :: PandocMonad m => MarkdownParser m String
+listContinuation = try $ do
+ lookAhead indentSpaces
+ result <- many1 listContinuationLine
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
+notFollowedByHtmlCloser = do
+ inHtmlBlock <- stateInHtmlBlock <$> getState
+ case inHtmlBlock of
+ Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
+ Nothing -> return ()
+
+listContinuationLine :: PandocMonad m => MarkdownParser m String
+listContinuationLine = try $ do
+ notFollowedBy blankline
+ notFollowedBy' listStart
+ notFollowedByHtmlCloser
+ optional indentSpaces
+ result <- anyLine
+ return $ result ++ "\n"
+
+listItem :: PandocMonad m
+ => MarkdownParser m a
+ -> MarkdownParser m (F Blocks)
+listItem start = try $ do
+ first <- rawListItem start
+ continuations <- many listContinuation
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may contain various block elements:
+ let raw = concat (first:continuations)
+ contents <- parseFromString parseBlocks raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return contents
+
+orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
+orderedList = try $ do
+ (start, style, delim) <- lookAhead anyOrderedListStart
+ unless (style `elem` [DefaultStyle, Decimal, Example] &&
+ delim `elem` [DefaultDelim, Period]) $
+ guardEnabled Ext_fancy_lists
+ when (style == Example) $ guardEnabled Ext_example_lists
+ items <- fmap sequence $ many1 $ listItem
+ ( try $ do
+ optional newline -- if preceded by Plain block in a list
+ startpos <- sourceColumn <$> getPosition
+ skipNonindentSpaces
+ res <- orderedListMarker style delim
+ endpos <- sourceColumn <$> getPosition
+ tabStop <- getOption readerTabStop
+ lookAhead (newline <|> spaceChar)
+ atMostSpaces (tabStop - (endpos - startpos))
+ return res )
+ start' <- option 1 $ guardEnabled Ext_startnum >> return start
+ return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
+
+bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
+bulletList = do
+ items <- fmap sequence $ many1 $ listItem bulletListStart
+ return $ B.bulletList <$> fmap compactify items
+
+-- definition lists
+
+defListMarker :: PandocMonad m => MarkdownParser m ()
+defListMarker = do
+ sps <- nonindentSpaces
+ char ':' <|> char '~'
+ tabStop <- getOption readerTabStop
+ let remaining = tabStop - (length sps + 1)
+ if remaining > 0
+ then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
+ else mzero
+ return ()
+
+definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
+definitionListItem compact = try $ do
+ rawLine' <- anyLine
+ raw <- many1 $ defRawBlock compact
+ term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
+ contents <- mapM (parseFromString parseBlocks . (++"\n")) raw
+ optional blanklines
+ return $ liftM2 (,) term (sequence contents)
+
+defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
+defRawBlock compact = try $ do
+ hasBlank <- option False $ blankline >> return True
+ defListMarker
+ firstline <- anyLine
+ let dline = try
+ ( do notFollowedBy blankline
+ notFollowedByHtmlCloser
+ if compact -- laziness not compatible with compact
+ then () <$ indentSpaces
+ else (() <$ indentSpaces)
+ <|> notFollowedBy defListMarker
+ anyLine )
+ rawlines <- many dline
+ cont <- liftM concat $ many $ try $ do
+ trailing <- option "" blanklines
+ ln <- indentSpaces >> notFollowedBy blankline >> anyLine
+ lns <- many dline
+ return $ trailing ++ unlines (ln:lns)
+ return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+ if hasBlank || not (null cont) then "\n\n" else ""
+
+definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
+definitionList = try $ do
+ lookAhead (anyLine >>
+ optional (blankline >> notFollowedBy (table >> return ())) >>
+ -- don't capture table caption as def list!
+ defListMarker)
+ compactDefinitionList <|> normalDefinitionList
+
+compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
+compactDefinitionList = do
+ guardEnabled Ext_compact_definition_lists
+ items <- fmap sequence $ many1 $ definitionListItem True
+ return $ B.definitionList <$> fmap compactifyDL items
+
+normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
+normalDefinitionList = do
+ guardEnabled Ext_definition_lists
+ items <- fmap sequence $ many1 $ definitionListItem False
+ return $ B.definitionList <$> items
+
+--
+-- paragraph block
+--
+
+para :: PandocMonad m => MarkdownParser m (F Blocks)
+para = try $ do
+ exts <- getOption readerExtensions
+ result <- trimInlinesF . mconcat <$> many1 inline
+ option (B.plain <$> result)
+ $ try $ do
+ newline
+ (blanklines >> return mempty)
+ <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+ <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+ <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+ <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+ -- Avoid creating a paragraph in a nested list.
+ notFollowedBy' inList >>
+ () <$ lookAhead listStart)
+ <|> do guardEnabled Ext_native_divs
+ inHtmlBlock <- stateInHtmlBlock <$> getState
+ case inHtmlBlock of
+ Just "div" -> () <$
+ lookAhead (htmlTag (~== TagClose "div"))
+ _ -> mzero
+ return $ do
+ result' <- result
+ case B.toList result' of
+ [Image attr alt (src,tit)]
+ | Ext_implicit_figures `extensionEnabled` exts ->
+ -- the fig: at beginning of title indicates a figure
+ return $ B.para $ B.singleton
+ $ Image attr alt (src,'f':'i':'g':':':tit)
+ _ -> return $ B.para result'
+
+plain :: PandocMonad m => MarkdownParser m (F Blocks)
+plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
+
+--
+-- raw html
+--
+
+htmlElement :: PandocMonad m => MarkdownParser m String
+htmlElement = rawVerbatimBlock
+ <|> strictHtmlBlock
+ <|> liftM snd (htmlTag isBlockTag)
+
+htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+htmlBlock = do
+ guardEnabled Ext_raw_html
+ try (do
+ (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
+ (guard (t `elem` ["pre","style","script"]) >>
+ (return . B.rawBlock "html") <$> rawVerbatimBlock)
+ <|> (do guardEnabled Ext_markdown_attribute
+ oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
+ markdownAttribute <-
+ case lookup "markdown" attrs of
+ Just "0" -> False <$ updateState (\st -> st{
+ stateMarkdownAttribute = False })
+ Just _ -> True <$ updateState (\st -> st{
+ stateMarkdownAttribute = True })
+ Nothing -> return oldMarkdownAttribute
+ res <- if markdownAttribute
+ then rawHtmlBlocks
+ else htmlBlock'
+ updateState $ \st -> st{ stateMarkdownAttribute =
+ oldMarkdownAttribute }
+ return res)
+ <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
+ <|> htmlBlock'
+
+htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+htmlBlock' = try $ do
+ first <- htmlElement
+ skipMany spaceChar
+ optional blanklines
+ return $ return $ B.rawBlock "html" first
+
+strictHtmlBlock :: PandocMonad m => MarkdownParser m String
+strictHtmlBlock = htmlInBalanced (not . isInlineTag)
+
+rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
+rawVerbatimBlock = htmlInBalanced isVerbTag
+ where isVerbTag (TagOpen "pre" _) = True
+ isVerbTag (TagOpen "style" _) = True
+ isVerbTag (TagOpen "script" _) = True
+ isVerbTag _ = False
+
+rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+rawTeXBlock = do
+ guardEnabled Ext_raw_tex
+ result <- (B.rawBlock "latex" . concat <$>
+ rawLaTeXBlock `sepEndBy1` blankline)
+ <|> (B.rawBlock "context" . concat <$>
+ rawConTeXtEnvironment `sepEndBy1` blankline)
+ spaces
+ return $ return result
+
+rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
+rawHtmlBlocks = do
+ (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ -- try to find closing tag
+ -- we set stateInHtmlBlock so that closing tags that can be either block or
+ -- inline will not be parsed as inline tags
+ oldInHtmlBlock <- stateInHtmlBlock <$> getState
+ updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
+ let closer = htmlTag (\x -> x ~== TagClose tagtype)
+ contents <- mconcat <$> many (notFollowedBy' closer >> block)
+ result <-
+ (closer >>= \(_, rawcloser) -> return (
+ return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
+ contents <>
+ return (B.rawBlock "html" rawcloser)))
+ <|> return (return (B.rawBlock "html" raw) <> contents)
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+ return result
+
+-- remove markdown="1" attribute
+stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
+ where filterAttrib (TagOpen t as) = TagOpen t
+ [(k,v) | (k,v) <- as, k /= "markdown"]
+ filterAttrib x = x
+
+--
+-- line block
+--
+
+lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
+lineBlock = try $ do
+ guardEnabled Ext_line_blocks
+ lines' <- lineBlockLines >>=
+ mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
+ return $ B.lineBlock <$> sequence lines'
+
+--
+-- Tables
+--
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine :: PandocMonad m
+ => Char
+ -> ParserT [Char] st m (Int, Int)
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ let lengthDashes = length dashes
+ lengthSp = length sp
+ return (lengthDashes, lengthDashes + lengthSp)
+
+-- Parse a table header with dashed lines of '-' preceded by
+-- one (or zero) line of text.
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
+simpleTableHeader headless = try $ do
+ rawContent <- if headless
+ then return ""
+ else anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines') = unzip dashes
+ let indices = scanl (+) (length initSp) lines'
+ -- If no header, calculate alignment on basis of first row of text
+ rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
+ if headless
+ then lookAhead anyLine
+ else return rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ let rawHeads' = if headless
+ then replicate (length dashes) ""
+ else rawHeads
+ heads <- fmap sequence
+ $ mapM (parseFromString (mconcat <$> many plain))
+ $ map trim rawHeads'
+ return (heads, aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String]
+ -> Int
+ -> Alignment
+alignType [] _ = AlignDefault
+alignType strLst len =
+ let nonempties = filter (not . null) $ map trimr strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter :: PandocMonad m => MarkdownParser m String
+tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep :: PandocMonad m => MarkdownParser m Char
+tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m [String]
+rawTableLine indices = do
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map trim $ tail $
+ splitStringByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
+tableLine indices = rawTableLine indices >>=
+ fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow :: PandocMonad m
+ => [Int]
+ -> MarkdownParser m (F [Blocks])
+multilineRow indices = do
+ colLines <- many1 (rawTableLine indices)
+ let cols = map unlines $ transpose colLines
+ fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+
+-- Parses a table caption: inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
+tableCaption = try $ do
+ guardEnabled Ext_table_captions
+ skipNonindentSpaces
+ string ":" <|> string "Table:"
+ trimInlinesF . mconcat <$> many1 inline <* blanklines
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+simpleTable headless = do
+ (aligns, _widths, heads', lines') <-
+ tableWith (simpleTableHeader headless) tableLine
+ (return ())
+ (if headless then tableFooter else tableFooter <|> blanklines)
+ -- Simple tables get 0s for relative column widths (i.e., use default)
+ return (aligns, replicate (length aligns) 0, heads', lines')
+
+-- Parse a multiline table: starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+multilineTable headless =
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
+
+multilineTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
+multilineTableHeader headless = try $ do
+ unless headless $
+ tableSep >> notFollowedBy blankline
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1 $ notFollowedBy tableSep >> anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines') = unzip dashes
+ let indices = scanl (+) (length initSp) lines'
+ rawHeadsList <- if headless
+ then liftM (map (:[]) . tail .
+ splitStringByIndices (init indices)) $ lookAhead anyLine
+ else return $ transpose $ map
+ (tail . splitStringByIndices (init indices))
+ rawContent
+ let aligns = zipWith alignType rawHeadsList lengths
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (unlines . map trim) rawHeadsList
+ heads <- fmap sequence $
+ mapM (parseFromString (mconcat <$> many plain)) $
+ map trim rawHeads
+ return (heads, aligns, indices)
+
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+gridTable :: PandocMonad m => Bool -- ^ Headerless table
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+gridTable headless =
+ tableWith (gridTableHeader headless) gridTableRow
+ (gridTableSep '-') gridTableFooter
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitStringByIndices (init indices) $ trimr line
+
+gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment)
+gridPart ch = do
+ leftColon <- option False (True <$ char ':')
+ dashes <- many1 (char ch)
+ rightColon <- option False (True <$ char ':')
+ char '+'
+ let lengthDashes = length dashes + (if leftColon then 1 else 0) +
+ (if rightColon then 1 else 0)
+ let alignment = case (leftColon, rightColon) of
+ (True, True) -> AlignCenter
+ (True, False) -> AlignLeft
+ (False, True) -> AlignRight
+ (False, False) -> AlignDefault
+ return ((lengthDashes, lengthDashes + 1), alignment)
+
+gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
+
+removeFinalBar :: String -> String
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table
+ -> MarkdownParser m (F [Blocks], [Alignment], [Int])
+gridTableHeader headless = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return []
+ else many1 (try (char '|' >> anyLine))
+ underDashes <- if headless
+ then return dashes
+ else gridDashedLines '='
+ guard $ length dashes == length underDashes
+ let lines' = map (snd . fst) underDashes
+ let indices = scanl (+) 0 lines'
+ let aligns = map snd underDashes
+ let rawHeads = if headless
+ then replicate (length underDashes) ""
+ else map (unlines . map trim) $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
+ return (heads, aligns, indices)
+
+gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- anyLine
+ return (gridTableSplitLine indices line)
+
+-- | Parse row of grid table.
+gridTableRow :: PandocMonad m => [Int]
+ -> MarkdownParser m (F [Blocks])
+gridTableRow indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+-- | Parse footer for a grid table.
+gridTableFooter :: PandocMonad m => MarkdownParser m [Char]
+gridTableFooter = blanklines
+
+pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
+pipeBreak = try $ do
+ nonindentSpaces
+ openPipe <- (True <$ char '|') <|> return False
+ first <- pipeTableHeaderPart
+ rest <- many $ sepPipe *> pipeTableHeaderPart
+ -- surrounding pipes needed for a one-column table:
+ guard $ not (null rest && not openPipe)
+ optional (char '|')
+ blankline
+ return $ unzip (first:rest)
+
+pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable = try $ do
+ nonindentSpaces
+ lookAhead nonspaceChar
+ (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
+ let heads' = take (length aligns) <$> heads
+ lines' <- many pipeTableRow
+ let lines'' = map (take (length aligns) <$>) lines'
+ let maxlength = maximum $
+ map (\x -> length . stringify $ runF x def) (heads' : lines'')
+ numColumns <- getOption readerColumns
+ let widths = if maxlength > numColumns
+ then map (\len ->
+ fromIntegral (len + 1) / fromIntegral numColumns)
+ seplengths
+ else replicate (length aligns) 0.0
+ return $ (aligns, widths, heads', sequence lines'')
+
+sepPipe :: PandocMonad m => MarkdownParser m ()
+sepPipe = try $ do
+ char '|' <|> char '+'
+ notFollowedBy blankline
+
+-- parse a row, also returning probable alignments for org-table cells
+pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
+pipeTableRow = try $ do
+ scanForPipe
+ skipMany spaceChar
+ openPipe <- (True <$ char '|') <|> return False
+ -- split into cells
+ let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
+ <|> void (noneOf "|\n\r")
+ let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
+ parseFromString pipeTableCell
+ cells <- cellContents `sepEndBy1` (char '|')
+ -- surrounding pipes needed for a one-column table:
+ guard $ not (length cells == 1 && not openPipe)
+ blankline
+ return $ sequence cells
+
+pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
+pipeTableCell = do
+ result <- many inline
+ if null result
+ then return mempty
+ else return $ B.plain . mconcat <$> sequence result
+
+pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
+pipeTableHeaderPart = try $ do
+ skipMany spaceChar
+ left <- optionMaybe (char ':')
+ pipe <- many1 (char '-')
+ right <- optionMaybe (char ':')
+ skipMany spaceChar
+ let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
+ return $
+ ((case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter), len)
+
+-- Succeed only if current line contains a pipe.
+scanForPipe :: PandocMonad m => ParserT [Char] st m ()
+scanForPipe = do
+ inp <- getInput
+ case break (\c -> c == '\n' || c == '|') inp of
+ (_,'|':_) -> return ()
+ _ -> mzero
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'. Variant of the version in
+-- Text.Pandoc.Parsing.
+tableWith :: PandocMonad m
+ => MarkdownParser m (F [Blocks], [Alignment], [Int])
+ -> ([Int] -> MarkdownParser m (F [Blocks]))
+ -> MarkdownParser m sep
+ -> MarkdownParser m end
+ -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
+tableWith headerParser rowParser lineParser footerParser = try $ do
+ (heads, aligns, indices) <- headerParser
+ lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ footerParser
+ numColumns <- getOption readerColumns
+ let widths = if (indices == [])
+ then replicate (length aligns) 0.0
+ else widthsFromIndices numColumns indices
+ return $ (aligns, widths, heads, lines')
+
+table :: PandocMonad m => MarkdownParser m (F Blocks)
+table = try $ do
+ frontCaption <- option Nothing (Just <$> tableCaption)
+ (aligns, widths, heads, lns) <-
+ try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable False) <|>
+ try (guardEnabled Ext_simple_tables >>
+ (simpleTable True <|> simpleTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ multilineTable True) <|>
+ try (guardEnabled Ext_grid_tables >>
+ (gridTable False <|> gridTable True)) <?> "table"
+ optional blanklines
+ caption <- case frontCaption of
+ Nothing -> option (return mempty) tableCaption
+ Just c -> return c
+ -- renormalize widths if greater than 100%:
+ let totalWidth = sum widths
+ let widths' = if totalWidth < 1
+ then widths
+ else map (/ totalWidth) widths
+ return $ do
+ caption' <- caption
+ heads' <- heads
+ lns' <- lns
+ return $ B.table caption' (zip aligns widths') heads' lns'
+
+--
+-- inline
+--
+
+inline :: PandocMonad m => MarkdownParser m (F Inlines)
+inline = choice [ whitespace
+ , bareURL
+ , str
+ , endline
+ , code
+ , strongOrEmph
+ , note
+ , cite
+ , bracketedSpan
+ , link
+ , image
+ , math
+ , strikeout
+ , subscript
+ , superscript
+ , inlineNote -- after superscript because of ^[link](/foo)^
+ , autoLink
+ , spanHtml
+ , rawHtmlInline
+ , escapedChar
+ , rawLaTeXInline'
+ , exampleRef
+ , smart
+ , return . B.singleton <$> charRef
+ , emoji
+ , symbol
+ , ltSign
+ ] <?> "inline"
+
+escapedChar' :: PandocMonad m => MarkdownParser m Char
+escapedChar' = try $ do
+ char '\\'
+ (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
+ <|> (guardEnabled Ext_angle_brackets_escapable >>
+ oneOf "\\`*_{}[]()>#+-.!~\"<>")
+ <|> (guardEnabled Ext_escaped_line_breaks >> char '\n')
+ <|> oneOf "\\`*_{}[]()>#+-.!~\""
+
+escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
+escapedChar = do
+ result <- escapedChar'
+ case result of
+ ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ '\n' -> guardEnabled Ext_escaped_line_breaks >>
+ return (return B.linebreak) -- "\[newline]" is a linebreak
+ _ -> return $ return $ B.str [result]
+
+ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
+ltSign = do
+ guardDisabled Ext_raw_html
+ <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
+ char '<'
+ return $ return $ B.str "<"
+
+exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
+exampleRef = try $ do
+ guardEnabled Ext_example_lists
+ char '@'
+ lab <- many1 (alphaNum <|> oneOf "-_")
+ return $ do
+ st <- askF
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
+
+symbol :: PandocMonad m => MarkdownParser m (F Inlines)
+symbol = do
+ result <- noneOf "<\\\n\t "
+ <|> try (do lookAhead $ char '\\'
+ notFollowedBy' (() <$ rawTeXBlock)
+ char '\\')
+ return $ return $ B.str [result]
+
+-- parses inline code, between n `s and n `s
+code :: PandocMonad m => MarkdownParser m (F Inlines)
+code = try $ do
+ starts <- many1 (char '`')
+ skipSpaces
+ result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (char '\n' >> notFollowedBy' blankline >> return " "))
+ (try (skipSpaces >> count (length starts) (char '`') >>
+ notFollowedBy (char '`')))
+ attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
+ >> attributes)
+ return $ return $ B.codeWith attr $ trim $ concat result
+
+math :: PandocMonad m => MarkdownParser m (F Inlines)
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
+ (guardEnabled Ext_smart *> (return <$> apostrophe)
+ <* notFollowedBy (space <|> satisfy isPunctuation))
+
+-- Parses material enclosed in *s, **s, _s, or __s.
+-- Designed to avoid backtracking.
+enclosure :: PandocMonad m
+ => Char
+ -> MarkdownParser m (F Inlines)
+enclosure c = do
+ -- we can't start an enclosure with _ if after a string and
+ -- the intraword_underscores extension is enabled:
+ guardDisabled Ext_intraword_underscores
+ <|> guard (c == '*')
+ <|> (guard =<< notAfterString)
+ cs <- many1 (char c)
+ (return (B.str cs) <>) <$> whitespace
+ <|> do
+ case length cs of
+ 3 -> three c
+ 2 -> two c mempty
+ 1 -> one c mempty
+ _ -> return (return $ B.str cs)
+
+ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
+ender c n = try $ do
+ count n (char c)
+ guard (c == '*')
+ <|> guardDisabled Ext_intraword_underscores
+ <|> notFollowedBy alphaNum
+
+-- Parse inlines til you hit one c or a sequence of two cs.
+-- If one c, emit emph and then parse two.
+-- If two cs, emit strong and then parse one.
+-- Otherwise, emit ccc then the results.
+three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
+three c = do
+ contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
+ (ender c 3 >> return ((B.strong . B.emph) <$> contents))
+ <|> (ender c 2 >> one c (B.strong <$> contents))
+ <|> (ender c 1 >> two c (B.emph <$> contents))
+ <|> return (return (B.str [c,c,c]) <> contents)
+
+-- Parse inlines til you hit two c's, and emit strong.
+-- If you never do hit two cs, emit ** plus inlines parsed.
+two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
+two c prefix' = do
+ contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
+ (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
+ <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+
+-- Parse inlines til you hit a c, and emit emph.
+-- If you never hit a c, emit * plus inlines parsed.
+one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
+one c prefix' = do
+ contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
+ <|> try (string [c,c] >>
+ notFollowedBy (ender c 1) >>
+ two c mempty) )
+ (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
+ <|> return (return (B.str [c]) <> (prefix' <> contents))
+
+strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
+strongOrEmph = enclosure '*' <|> enclosure '_'
+
+-- | Parses a list of inlines between start and end delimiters.
+inlinesBetween :: PandocMonad m
+ => (Show b)
+ => MarkdownParser m a
+ -> MarkdownParser m b
+ -> MarkdownParser m (F Inlines)
+inlinesBetween start end =
+ (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
+ innerSpace = try $ whitespace <* notFollowedBy' end
+
+strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
+strikeout = fmap B.strikeout <$>
+ (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
+ where strikeStart = string "~~" >> lookAhead nonspaceChar
+ >> notFollowedBy (char '~')
+ strikeEnd = try $ string "~~"
+
+superscript :: PandocMonad m => MarkdownParser m (F Inlines)
+superscript = fmap B.superscript <$> try (do
+ guardEnabled Ext_superscript
+ char '^'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
+
+subscript :: PandocMonad m => MarkdownParser m (F Inlines)
+subscript = fmap B.subscript <$> try (do
+ guardEnabled Ext_subscript
+ char '~'
+ mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
+
+whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
+whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+ where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
+ regsp = skipMany spaceChar >> return B.space
+
+nonEndline :: PandocMonad m => ParserT [Char] st m Char
+nonEndline = satisfy (/='\n')
+
+str :: PandocMonad m => MarkdownParser m (F Inlines)
+str = do
+ result <- many1 alphaNum
+ updateLastStrPos
+ let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
+ isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if isSmart
+ then case likelyAbbrev result of
+ [] -> return $ return $ B.str result
+ xs -> choice (map (\x ->
+ try (string x >> oneOf " \n" >>
+ lookAhead alphaNum >>
+ return (return $ B.str
+ $ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ return $ B.str result)
+ else return $ return $ B.str result
+
+-- | if the string matches the beginning of an abbreviation (before
+-- the first period, return strings that would finish the abbreviation.
+likelyAbbrev :: String -> [String]
+likelyAbbrev x =
+ let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
+ "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
+ "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
+ "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
+ "ch.", "sec.", "cf.", "cp."]
+ abbrPairs = map (break (=='.')) abbrevs
+ in map snd $ filter (\(y,_) -> y == x) abbrPairs
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: PandocMonad m => MarkdownParser m (F Inlines)
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts differently if in a list:
+ notFollowedBy (inList >> listStart)
+ guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
+ guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
+ guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header
+ guardDisabled Ext_backtick_code_blocks <|>
+ notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
+ notFollowedByHtmlCloser
+ (eof >> return mempty)
+ <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+ <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
+ <|> (skipMany spaceChar >> return (return B.softbreak))
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
+reference = do notFollowedBy' (string "[^") -- footnote reference
+ withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
+
+parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
+parenthesizedChars = do
+ result <- charsInBalanced '(' ')' litChar
+ return $ '(' : result ++ ")"
+
+-- source for a link, with optional title
+source :: PandocMonad m => MarkdownParser m (String, String)
+source = do
+ char '('
+ skipSpaces
+ let urlChunk =
+ try parenthesizedChars
+ <|> (notFollowedBy (oneOf " )") >> (count 1 litChar))
+ <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
+ let sourceURL = (unwords . words . concat) <$> many urlChunk
+ let betweenAngles = try $
+ char '<' >> manyTill litChar (char '>')
+ src <- try betweenAngles <|> sourceURL
+ tit <- option "" $ try $ spnl >> linkTitle
+ skipSpaces
+ char ')'
+ return (escapeURI $ trimr src, tit)
+
+linkTitle :: PandocMonad m => MarkdownParser m String
+linkTitle = quotedTitle '"' <|> quotedTitle '\''
+
+link :: PandocMonad m => MarkdownParser m (F Inlines)
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (lab,raw) <- reference
+ setState $ st{ stateAllowLinks = True }
+ regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
+
+bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
+bracketedSpan = try $ do
+ guardEnabled Ext_bracketed_spans
+ (lab,_) <- reference
+ attr <- attributes
+ let (ident,classes,keyvals) = attr
+ case lookup "style" keyvals of
+ Just s | null ident && null classes &&
+ map toLower (filter (`notElem` " \t;") s) ==
+ "font-variant:small-caps"
+ -> return $ B.smallcaps <$> lab
+ _ -> return $ B.spanWith attr <$> lab
+
+regLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> F Inlines
+ -> MarkdownParser m (F Inlines)
+regLink constructor lab = try $ do
+ (src, tit) <- source
+ attr <- option nullAttr $
+ guardEnabled Ext_link_attributes >> attributes
+ return $ constructor attr src tit <$> lab
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink :: PandocMonad m
+ => (Attr -> String -> String -> Inlines -> Inlines)
+ -> (F Inlines, String)
+ -> MarkdownParser m (F Inlines)
+referenceLink constructor (lab, raw) = do
+ sp <- (True <$ lookAhead (char ' ')) <|> return False
+ (_,raw') <- option (mempty, "") $
+ lookAhead (try (guardEnabled Ext_citations >>
+ spnl >> normalCite >> return (mempty, "")))
+ <|>
+ try (spnl >> reference)
+ when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
+ let labIsRef = raw' == "" || raw' == "[]"
+ let key = toKey $ if labIsRef then raw else raw'
+ parsedRaw <- parseFromString (mconcat <$> many inline) raw'
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ implicitHeaderRefs <- option False $
+ True <$ guardEnabled Ext_implicit_header_references
+ let makeFallback = do
+ parsedRaw' <- parsedRaw
+ fallback' <- fallback
+ return $ B.str "[" <> fallback' <> B.str "]" <>
+ (if sp && not (null raw) then B.space else mempty) <>
+ parsedRaw'
+ return $ do
+ keys <- asksF stateKeys
+ case M.lookup key keys of
+ Nothing ->
+ if implicitHeaderRefs
+ then do
+ headerKeys <- asksF stateHeaderKeys
+ case M.lookup key headerKeys of
+ Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
+ Nothing -> makeFallback
+ else makeFallback
+ Just ((src,tit), attr) -> constructor attr src tit <$> lab
+
+dropBrackets :: String -> String
+dropBrackets = reverse . dropRB . reverse . dropLB
+ where dropRB (']':xs) = xs
+ dropRB xs = xs
+ dropLB ('[':xs) = xs
+ dropLB xs = xs
+
+bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
+bareURL = try $ do
+ guardEnabled Ext_autolink_bare_uris
+ getState >>= guard . stateAllowLinks
+ (orig, src) <- uri <|> emailAddress
+ notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
+ return $ return $ B.link src "" (B.str orig)
+
+autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
+autoLink = try $ do
+ getState >>= guard . stateAllowLinks
+ char '<'
+ (orig, src) <- uri <|> emailAddress
+ -- in rare cases, something may remain after the uri parser
+ -- is finished, because the uri parser tries to avoid parsing
+ -- final punctuation. for example: in `<http://hi---there>`,
+ -- the URI parser will stop before the dashes.
+ extra <- fromEntities <$> manyTill nonspaceChar (char '>')
+ attr <- option nullAttr $ try $
+ guardEnabled Ext_link_attributes >> attributes
+ return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+
+image :: PandocMonad m => MarkdownParser m (F Inlines)
+image = try $ do
+ char '!'
+ (lab,raw) <- reference
+ defaultExt <- getOption readerDefaultImageExtension
+ let constructor attr' src = case takeExtension src of
+ "" -> B.imageWith attr' (addExtension src defaultExt)
+ _ -> B.imageWith attr' src
+ regLink constructor lab <|> referenceLink constructor (lab,raw)
+
+note :: PandocMonad m => MarkdownParser m (F Inlines)
+note = try $ do
+ guardEnabled Ext_footnotes
+ ref <- noteMarker
+ return $ do
+ notes <- asksF stateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Just contents -> do
+ st <- askF
+ -- process the note in a context that doesn't resolve
+ -- notes, to avoid infinite looping with notes inside
+ -- notes:
+ let contents' = runF contents st{ stateNotes' = [] }
+ return $ B.note contents'
+
+inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
+inlineNote = try $ do
+ guardEnabled Ext_inline_notes
+ char '^'
+ contents <- inlinesInBalancedBrackets
+ return $ B.note . B.para <$> contents
+
+rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
+rawLaTeXInline' = try $ do
+ guardEnabled Ext_raw_tex
+ lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
+ RawInline _ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s
+ -- "tex" because it might be context or latex
+
+rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
+rawConTeXtEnvironment = try $ do
+ string "\\start"
+ completion <- inBrackets (letter <|> digit <|> spaceChar)
+ <|> (many1 letter)
+ contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
+ (try $ string "\\stop" >> string completion)
+ return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
+
+inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
+inBrackets parser = do
+ char '['
+ contents <- many parser
+ char ']'
+ return $ "[" ++ contents ++ "]"
+
+spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
+spanHtml = try $ do
+ guardEnabled Ext_native_spans
+ (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
+ contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ case lookup "style" keyvals of
+ Just s | null ident && null classes &&
+ map toLower (filter (`notElem` " \t;") s) ==
+ "font-variant:small-caps"
+ -> return $ B.smallcaps <$> contents
+ _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
+
+divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
+divHtml = try $ do
+ guardEnabled Ext_native_divs
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ -- we set stateInHtmlBlock so that closing tags that can be either block or
+ -- inline will not be parsed as inline tags
+ oldInHtmlBlock <- stateInHtmlBlock <$> getState
+ updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+ bls <- option "" (blankline >> option "" blanklines)
+ contents <- mconcat <$>
+ many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose "div"))
+ if closed
+ then do
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ B.divWith (ident, classes, keyvals) <$> contents
+ else -- avoid backtracing
+ return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+
+rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
+rawHtmlInline = do
+ guardEnabled Ext_raw_html
+ inHtmlBlock <- stateInHtmlBlock <$> getState
+ let isCloseBlockTag t = case inHtmlBlock of
+ Just t' -> t ~== TagClose t'
+ Nothing -> False
+ mdInHtml <- option False $
+ ( guardEnabled Ext_markdown_in_html_blocks
+ <|> guardEnabled Ext_markdown_attribute
+ ) >> return True
+ (_,result) <- htmlTag $ if mdInHtml
+ then (\x -> isInlineTag x &&
+ not (isCloseBlockTag x))
+ else not . isTextTag
+ return $ return $ B.rawInline "html" result
+
+-- Emoji
+
+emojiChars :: [Char]
+emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
+
+emoji :: PandocMonad m => MarkdownParser m (F Inlines)
+emoji = try $ do
+ guardEnabled Ext_emoji
+ char ':'
+ emojikey <- many1 (oneOf emojiChars)
+ char ':'
+ case M.lookup emojikey emojis of
+ Just s -> return (return (B.str s))
+ Nothing -> mzero
+
+-- Citations
+
+cite :: PandocMonad m => MarkdownParser m (F Inlines)
+cite = do
+ guardEnabled Ext_citations
+ citations <- textualCite
+ <|> do (cs, raw) <- withRaw normalCite
+ return $ (flip B.cite (B.text raw)) <$> cs
+ return citations
+
+textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
+textualCite = try $ do
+ (_, key) <- citeKey
+ let first = Citation{ citationId = key
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = AuthorInText
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
+ case mbrest of
+ Just (rest, raw) ->
+ return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+ <$> rest
+ Nothing ->
+ (do
+ (cs, raw) <- withRaw $ bareloc first
+ let (spaces',raw') = span isSpace raw
+ spc | null spaces' = mempty
+ | otherwise = B.space
+ lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
+ fallback <- referenceLink B.linkWith (lab,raw')
+ return $ do
+ fallback' <- fallback
+ cs' <- cs
+ return $
+ case B.toList fallback' of
+ Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
+ _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
+ <|> return (do st <- askF
+ return $ case M.lookup key (stateExamples st) of
+ Just n -> B.str (show n)
+ _ -> B.cite [first] $ B.str $ '@':key)
+
+bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
+bareloc c = try $ do
+ spnl
+ char '['
+ notFollowedBy $ char '^'
+ suff <- suffix
+ rest <- option (return []) $ try $ char ';' >> citeList
+ spnl
+ char ']'
+ notFollowedBy $ oneOf "[("
+ return $ do
+ suff' <- suff
+ rest' <- rest
+ return $ c{ citationSuffix = B.toList suff' } : rest'
+
+normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
+normalCite = try $ do
+ char '['
+ spnl
+ citations <- citeList
+ spnl
+ char ']'
+ return citations
+
+suffix :: PandocMonad m => MarkdownParser m (F Inlines)
+suffix = try $ do
+ hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+ spnl
+ rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
+ return $ if hasSpace
+ then (B.space <>) <$> rest
+ else rest
+
+prefix :: PandocMonad m => MarkdownParser m (F Inlines)
+prefix = trimInlinesF . mconcat <$>
+ manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
+
+citeList :: PandocMonad m => MarkdownParser m (F [Citation])
+citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
+
+citation :: PandocMonad m => MarkdownParser m (F Citation)
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+smart :: PandocMonad m => MarkdownParser m (F Inlines)
+smart = do
+ guardEnabled Ext_smart
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return <$>) [apostrophe, dash, ellipses])
+
+singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
+ (fmap B.doubleQuoted . trimInlinesF $ contents))
+ <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
new file mode 100644
index 000000000..14f9da9b6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -0,0 +1,677 @@
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.MediaWiki
+ Copyright : Copyright (C) 2012-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of mediawiki text to 'Pandoc' document.
+-}
+{-
+TODO:
+_ correctly handle tables within tables
+_ parse templates?
+-}
+module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import Data.Monoid ((<>))
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
+import Text.Pandoc.XML ( fromEntities )
+import Text.Pandoc.Parsing hiding ( nested )
+import Text.Pandoc.Walk ( walk )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
+import Control.Monad
+import Data.List (intersperse, intercalate, isPrefixOf )
+import Text.HTML.TagSoup
+import Data.Sequence (viewl, ViewL(..), (<|))
+import qualified Data.Foldable as F
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Char (isDigit, isSpace)
+import Data.Maybe (fromMaybe)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad, report)
+
+-- | Read mediawiki from an input string and return a Pandoc document.
+readMediaWiki :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readMediaWiki opts s = do
+ parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
+ , mwMaxNestingLevel = 4
+ , mwNextLinkNumber = 1
+ , mwCategoryLinks = []
+ , mwHeaderMap = M.empty
+ , mwIdentifierList = Set.empty
+ }
+ (s ++ "\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+data MWState = MWState { mwOptions :: ReaderOptions
+ , mwMaxNestingLevel :: Int
+ , mwNextLinkNumber :: Int
+ , mwCategoryLinks :: [Inlines]
+ , mwHeaderMap :: M.Map Inlines String
+ , mwIdentifierList :: Set.Set String
+ }
+
+type MWParser m = ParserT [Char] MWState m
+
+instance HasReaderOptions MWState where
+ extractReaderOptions = mwOptions
+
+instance HasHeaderMap MWState where
+ extractHeaderMap = mwHeaderMap
+ updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st }
+
+instance HasIdentifierList MWState where
+ extractIdentifierList = mwIdentifierList
+ updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
+
+--
+-- auxiliary functions
+--
+
+-- This is used to prevent exponential blowups for things like:
+-- ''a'''a''a'''a''a'''a''a'''a
+nested :: PandocMonad m => MWParser m a -> MWParser m a
+nested p = do
+ nestlevel <- mwMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ mwMaxNestingLevel = nestlevel }
+ return res
+
+specialChars :: [Char]
+specialChars = "'[]<=&*{}|\":\\"
+
+spaceChars :: [Char]
+spaceChars = " \n\t"
+
+sym :: PandocMonad m => String -> MWParser m ()
+sym s = () <$ try (string s)
+
+newBlockTags :: [String]
+newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
+
+isBlockTag' :: Tag String -> Bool
+isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
+isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
+ t `notElem` eitherBlockOrInline
+isBlockTag' tag = isBlockTag tag
+
+isInlineTag' :: Tag String -> Bool
+isInlineTag' (TagComment _) = True
+isInlineTag' t = not (isBlockTag' t)
+
+eitherBlockOrInline :: [String]
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object"]
+
+htmlComment :: PandocMonad m => MWParser m ()
+htmlComment = () <$ htmlTag isCommentTag
+
+inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
+inlinesInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ if '/' `elem` raw -- self-closing tag
+ then return mempty
+ else trimInlines . mconcat <$>
+ manyTill inline (htmlTag (~== TagClose tag))
+
+blocksInTags :: PandocMonad m => String -> MWParser m Blocks
+blocksInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ let closer = if tag == "li"
+ then htmlTag (~== TagClose "li")
+ <|> lookAhead (
+ htmlTag (~== TagOpen "li" [])
+ <|> htmlTag (~== TagClose "ol")
+ <|> htmlTag (~== TagClose "ul"))
+ else htmlTag (~== TagClose tag)
+ if '/' `elem` raw -- self-closing tag
+ then return mempty
+ else mconcat <$> manyTill block closer
+
+charsInTags :: PandocMonad m => String -> MWParser m [Char]
+charsInTags tag = try $ do
+ (_,raw) <- htmlTag (~== TagOpen tag [])
+ if '/' `elem` raw -- self-closing tag
+ then return ""
+ else manyTill anyChar (htmlTag (~== TagClose tag))
+
+--
+-- main parser
+--
+
+parseMediaWiki :: PandocMonad m => MWParser m Pandoc
+parseMediaWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ categoryLinks <- reverse . mwCategoryLinks <$> getState
+ let categories = if null categoryLinks
+ then mempty
+ else B.para $ mconcat $ intersperse B.space categoryLinks
+ return $ B.doc $ bs <> categories
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => MWParser m Blocks
+block = do
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> table
+ <|> header
+ <|> hrule
+ <|> orderedList
+ <|> bulletList
+ <|> definitionList
+ <|> mempty <$ try (spaces *> htmlComment)
+ <|> preformatted
+ <|> blockTag
+ <|> (B.rawBlock "mediawiki" <$> template)
+ <|> para
+ report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ return res
+
+para :: PandocMonad m => MWParser m Blocks
+para = do
+ contents <- trimInlines . mconcat <$> many1 inline
+ if F.all (==Space) contents
+ then return mempty
+ else return $ B.para contents
+
+table :: PandocMonad m => MWParser m Blocks
+table = do
+ tableStart
+ styles <- option [] parseAttrs <* blankline
+ let tableWidth = case lookup "width" styles of
+ Just w -> fromMaybe 1.0 $ parseWidth w
+ Nothing -> 1.0
+ caption <- option mempty tableCaption
+ optional rowsep
+ hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!'))
+ (cellspecs',hdr) <- unzip <$> tableRow
+ let widths = map ((tableWidth *) . snd) cellspecs'
+ let restwidth = tableWidth - sum widths
+ let zerocols = length $ filter (==0.0) widths
+ let defaultwidth = if zerocols == 0 || zerocols == length widths
+ then 0.0
+ else restwidth / fromIntegral zerocols
+ let widths' = map (\w -> if w == 0 then defaultwidth else w) widths
+ let cellspecs = zip (map fst cellspecs') widths'
+ rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
+ optional blanklines
+ tableEnd
+ let cols = length hdr
+ let (headers,rows) = if hasheader
+ then (hdr, rows')
+ else (replicate cols mempty, hdr:rows')
+ return $ B.table caption cellspecs headers rows
+
+parseAttrs :: PandocMonad m => MWParser m [(String,String)]
+parseAttrs = many1 parseAttr
+
+parseAttr :: PandocMonad m => MWParser m (String, String)
+parseAttr = try $ do
+ skipMany spaceChar
+ k <- many1 letter
+ char '='
+ v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
+ <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
+ return (k,v)
+
+tableStart :: PandocMonad m => MWParser m ()
+tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|"
+
+tableEnd :: PandocMonad m => MWParser m ()
+tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}"
+
+rowsep :: PandocMonad m => MWParser m ()
+rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <*
+ optional parseAttr <* blanklines
+
+cellsep :: PandocMonad m => MWParser m ()
+cellsep = try $
+ (guardColumnOne *> skipSpaces <*
+ ( (char '|' <* notFollowedBy (oneOf "-}+"))
+ <|> (char '!')
+ )
+ )
+ <|> (() <$ try (string "||"))
+ <|> (() <$ try (string "!!"))
+
+tableCaption :: PandocMonad m => MWParser m Inlines
+tableCaption = try $ do
+ guardColumnOne
+ skipSpaces
+ sym "|+"
+ optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces)
+ (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline)
+
+tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
+tableRow = try $ skipMany htmlComment *> many tableCell
+
+tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
+tableCell = try $ do
+ cellsep
+ skipMany spaceChar
+ attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
+ notFollowedBy (char '|')
+ skipMany spaceChar
+ ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
+ ((snd <$> withRaw table) <|> count 1 anyChar))
+ bs <- parseFromString (mconcat <$> many block) ls
+ let align = case lookup "align" attrs of
+ Just "left" -> AlignLeft
+ Just "right" -> AlignRight
+ Just "center" -> AlignCenter
+ _ -> AlignDefault
+ let width = case lookup "width" attrs of
+ Just xs -> fromMaybe 0.0 $ parseWidth xs
+ Nothing -> 0.0
+ return ((align, width), bs)
+
+parseWidth :: String -> Maybe Double
+parseWidth s =
+ case reverse s of
+ ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
+ _ -> Nothing
+
+template :: PandocMonad m => MWParser m String
+template = try $ do
+ string "{{"
+ notFollowedBy (char '{')
+ lookAhead $ letter <|> digit <|> char ':'
+ let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
+ contents <- manyTill chunk (try $ string "}}")
+ return $ "{{" ++ concat contents ++ "}}"
+
+blockTag :: PandocMonad m => MWParser m Blocks
+blockTag = do
+ (tag, _) <- lookAhead $ htmlTag isBlockTag'
+ case tag of
+ TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
+ TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
+ TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
+ TagOpen "source" attrs -> syntaxhighlight "source" attrs
+ TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
+ charsInTags "haskell"
+ TagOpen "gallery" _ -> blocksInTags "gallery"
+ TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
+ TagClose "p" -> mempty <$ htmlTag (~== tag)
+ _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
+
+trimCode :: String -> String
+trimCode ('\n':xs) = stripTrailingNewlines xs
+trimCode xs = stripTrailingNewlines xs
+
+syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
+syntaxhighlight tag attrs = try $ do
+ let mblang = lookup "lang" attrs
+ let mbstart = lookup "start" attrs
+ let mbline = lookup "line" attrs
+ let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline
+ let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
+ contents <- charsInTags tag
+ return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
+
+hrule :: PandocMonad m => MWParser m Blocks
+hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline)
+
+guardColumnOne :: PandocMonad m => MWParser m ()
+guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
+preformatted :: PandocMonad m => MWParser m Blocks
+preformatted = try $ do
+ guardColumnOne
+ char ' '
+ let endline' = B.linebreak <$ (try $ newline <* char ' ')
+ let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
+ let spToNbsp ' ' = '\160'
+ spToNbsp x = x
+ let nowiki' = mconcat . intersperse B.linebreak . map B.str .
+ lines . fromEntities . map spToNbsp <$> try
+ (htmlTag (~== TagOpen "nowiki" []) *>
+ manyTill anyChar (htmlTag (~== TagClose "nowiki")))
+ let inline' = whitespace' <|> endline' <|> nowiki'
+ <|> (try $ notFollowedBy newline *> inline)
+ contents <- mconcat <$> many1 inline'
+ let spacesStr (Str xs) = all isSpace xs
+ spacesStr _ = False
+ if F.all spacesStr contents
+ then return mempty
+ else return $ B.para $ encode contents
+
+encode :: Inlines -> Inlines
+encode = B.fromList . normalizeCode . B.toList . walk strToCode
+ where strToCode (Str s) = Code ("",[],[]) s
+ strToCode Space = Code ("",[],[]) " "
+ strToCode x = x
+ normalizeCode [] = []
+ normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
+ normalizeCode $ (Code a1 (x ++ y)) : zs
+ normalizeCode (x:xs) = x : normalizeCode xs
+
+header :: PandocMonad m => MWParser m Blocks
+header = try $ do
+ guardColumnOne
+ eqs <- many1 (char '=')
+ let lev = length eqs
+ guard $ lev <= 6
+ contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
+ attr <- registerHeader nullAttr contents
+ return $ B.headerWith attr lev contents
+
+bulletList :: PandocMonad m => MWParser m Blocks
+bulletList = B.bulletList <$>
+ ( many1 (listItem '*')
+ <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
+ optional (htmlTag (~== TagClose "ul"))) )
+
+orderedList :: PandocMonad m => MWParser m Blocks
+orderedList =
+ (B.orderedList <$> many1 (listItem '#'))
+ <|> try
+ (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+ spaces
+ items <- many (listItem '#' <|> li)
+ optional (htmlTag (~== TagClose "ol"))
+ let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
+ return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
+
+definitionList :: PandocMonad m => MWParser m Blocks
+definitionList = B.definitionList <$> many1 defListItem
+
+defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks])
+defListItem = try $ do
+ terms <- mconcat . intersperse B.linebreak <$> many defListTerm
+ -- we allow dd with no dt, or dt with no dd
+ defs <- if B.isNull terms
+ then notFollowedBy
+ (try $ skipMany1 (char ':') >> string "<math>") *>
+ many1 (listItem ':')
+ else many (listItem ':')
+ return (terms, defs)
+
+defListTerm :: PandocMonad m => MWParser m Inlines
+defListTerm = char ';' >> skipMany spaceChar >> anyLine >>=
+ parseFromString (trimInlines . mconcat <$> many inline)
+
+listStart :: PandocMonad m => Char -> MWParser m ()
+listStart c = char c *> notFollowedBy listStartChar
+
+listStartChar :: PandocMonad m => MWParser m Char
+listStartChar = oneOf "*#;:"
+
+anyListStart :: PandocMonad m => MWParser m Char
+anyListStart = char '*'
+ <|> char '#'
+ <|> char ':'
+ <|> char ';'
+
+li :: PandocMonad m => MWParser m Blocks
+li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
+ (firstParaToPlain <$> blocksInTags "li") <* spaces
+
+listItem :: PandocMonad m => Char -> MWParser m Blocks
+listItem c = try $ do
+ extras <- many (try $ char c <* lookAhead listStartChar)
+ if null extras
+ then listItem' c
+ else do
+ skipMany spaceChar
+ first <- concat <$> manyTill listChunk newline
+ rest <- many
+ (try $ string extras *> lookAhead listStartChar *>
+ (concat <$> manyTill listChunk newline))
+ contents <- parseFromString (many1 $ listItem' c)
+ (unlines (first : rest))
+ case c of
+ '*' -> return $ B.bulletList contents
+ '#' -> return $ B.orderedList contents
+ ':' -> return $ B.definitionList [(mempty, contents)]
+ _ -> mzero
+
+-- The point of this is to handle stuff like
+-- * {{cite book
+-- | blah
+-- | blah
+-- }}
+-- * next list item
+-- which seems to be valid mediawiki.
+listChunk :: PandocMonad m => MWParser m String
+listChunk = template <|> count 1 anyChar
+
+listItem' :: PandocMonad m => Char -> MWParser m Blocks
+listItem' c = try $ do
+ listStart c
+ skipMany spaceChar
+ first <- concat <$> manyTill listChunk newline
+ rest <- many (try $ char c *> lookAhead listStartChar *>
+ (concat <$> manyTill listChunk newline))
+ parseFromString (firstParaToPlain . mconcat <$> many1 block)
+ $ unlines $ first : rest
+
+firstParaToPlain :: Blocks -> Blocks
+firstParaToPlain contents =
+ case viewl (B.unMany contents) of
+ (Para xs) :< ys -> B.Many $ (Plain xs) <| ys
+ _ -> contents
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => MWParser m Inlines
+inline = whitespace
+ <|> url
+ <|> str
+ <|> doubleQuotes
+ <|> strong
+ <|> emph
+ <|> image
+ <|> internalLink
+ <|> externalLink
+ <|> math
+ <|> inlineTag
+ <|> B.singleton <$> charRef
+ <|> inlineHtml
+ <|> (B.rawInline "mediawiki" <$> variable)
+ <|> (B.rawInline "mediawiki" <$> template)
+ <|> special
+
+str :: PandocMonad m => MWParser m Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
+
+math :: PandocMonad m => MWParser m Inlines
+math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
+ <|> (B.math . trim <$> charsInTags "math")
+ <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
+ <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+ where dmStart = string "\\["
+ dmEnd = try (string "\\]")
+ mStart = string "\\("
+ mEnd = try (string "\\)")
+
+variable :: PandocMonad m => MWParser m String
+variable = try $ do
+ string "{{{"
+ contents <- manyTill anyChar (try $ string "}}}")
+ return $ "{{{" ++ contents ++ "}}}"
+
+inlineTag :: PandocMonad m => MWParser m Inlines
+inlineTag = do
+ (tag, _) <- lookAhead $ htmlTag isInlineTag'
+ case tag of
+ TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+ TagOpen "nowiki" _ -> try $ do
+ (_,raw) <- htmlTag (~== tag)
+ if '/' `elem` raw
+ then return mempty
+ else B.text . fromEntities <$>
+ manyTill anyChar (htmlTag (~== TagClose "nowiki"))
+ TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
+ *> optional blankline)
+ TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
+ TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
+ TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
+ TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
+ TagOpen "code" _ -> encode <$> inlinesInTags "code"
+ TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
+ TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
+ _ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
+
+special :: PandocMonad m => MWParser m Inlines
+special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
+ oneOf specialChars)
+
+inlineHtml :: PandocMonad m => MWParser m Inlines
+inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
+
+whitespace :: PandocMonad m => MWParser m Inlines
+whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment)
+ <|> B.softbreak <$ endline
+
+endline :: PandocMonad m => MWParser m ()
+endline = () <$ try (newline <*
+ notFollowedBy spaceChar <*
+ notFollowedBy newline <*
+ notFollowedBy' hrule <*
+ notFollowedBy tableStart <*
+ notFollowedBy' header <*
+ notFollowedBy anyListStart)
+
+imageIdentifiers :: PandocMonad m => [MWParser m ()]
+imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
+ where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
+ "Bild"]
+
+image :: PandocMonad m => MWParser m Inlines
+image = try $ do
+ sym "[["
+ choice imageIdentifiers
+ fname <- addUnderscores <$> many1 (noneOf "|]")
+ _ <- many imageOption
+ dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px")
+ <|> return []
+ _ <- many imageOption
+ let kvs = case dims of
+ w:[] -> [("width", w)]
+ w:(h:[]) -> [("width", w), ("height", h)]
+ _ -> []
+ let attr = ("", [], kvs)
+ caption <- (B.str fname <$ sym "]]")
+ <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
+ return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
+
+imageOption :: PandocMonad m => MWParser m String
+imageOption = try $ char '|' *> opt
+ where
+ opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
+ , "thumb", "upright", "left", "right"
+ , "center", "none", "baseline", "sub"
+ , "super", "top", "text-top", "middle"
+ , "bottom", "text-bottom" ])
+ <|> try (string "frame")
+ <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+
+collapseUnderscores :: String -> String
+collapseUnderscores [] = []
+collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
+collapseUnderscores (x:xs) = x : collapseUnderscores xs
+
+addUnderscores :: String -> String
+addUnderscores = collapseUnderscores . intercalate "_" . words
+
+internalLink :: PandocMonad m => MWParser m Inlines
+internalLink = try $ do
+ sym "[["
+ pagename <- unwords . words <$> many (noneOf "|]")
+ label <- option (B.text pagename) $ char '|' *>
+ ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
+ -- the "pipe trick"
+ -- [[Help:Contents|] -> "Contents"
+ <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) )
+ sym "]]"
+ linktrail <- B.text <$> many letter
+ let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
+ if "Category:" `isPrefixOf` pagename
+ then do
+ updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
+ return mempty
+ else return link
+
+externalLink :: PandocMonad m => MWParser m Inlines
+externalLink = try $ do
+ char '['
+ (_, src) <- uri
+ lab <- try (trimInlines . mconcat <$>
+ (skipMany1 spaceChar *> manyTill inline (char ']')))
+ <|> do char ']'
+ num <- mwNextLinkNumber <$> getState
+ updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
+ return $ B.str $ show num
+ return $ B.link src "" lab
+
+url :: PandocMonad m => MWParser m Inlines
+url = do
+ (orig, src) <- uri
+ return $ B.link src "" (B.str orig)
+
+-- | Parses a list of inlines between start and end delimiters.
+inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
+inlinesBetween start end =
+ (trimInlines . mconcat) <$> try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
+ innerSpace = try $ whitespace <* notFollowedBy' end
+
+emph :: PandocMonad m => MWParser m Inlines
+emph = B.emph <$> nested (inlinesBetween start end)
+ where start = sym "''" >> lookAhead nonspaceChar
+ end = try $ notFollowedBy' (() <$ strong) >> sym "''"
+
+strong :: PandocMonad m => MWParser m Inlines
+strong = B.strong <$> nested (inlinesBetween start end)
+ where start = sym "'''" >> lookAhead nonspaceChar
+ end = try $ sym "'''"
+
+doubleQuotes :: PandocMonad m => MWParser m Inlines
+doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote)
+ where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar
+ closeDoubleQuote = try $ sym "\""
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
new file mode 100644
index 000000000..1953c0c83
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -0,0 +1,71 @@
+{-
+Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; Either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Native
+ Copyright : Copyright (C) 2011-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of a string representation of a pandoc type (@Pandoc@,
+@[Block]@, @Block@, @[Inline]@, or @Inline@) to a @Pandoc@ document.
+-}
+module Text.Pandoc.Readers.Native ( readNative ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Options (ReaderOptions)
+
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error
+import Text.Pandoc.Class
+
+-- | Read native formatted text and return a Pandoc document.
+-- The input may be a full pandoc document, a block list, a block,
+-- an inline list, or an inline. Thus, for example,
+--
+-- > Str "hi"
+--
+-- will be treated as if it were
+--
+-- > Pandoc nullMeta [Plain [Str "hi"]]
+--
+readNative :: PandocMonad m
+ => ReaderOptions
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readNative _ s =
+ case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "couldn't read native"
+
+readBlocks :: String -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
+
+readBlock :: String -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
+
+readInlines :: String -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
+
+readInline :: String -> Either PandocError Inline
+readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s)
+
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
new file mode 100644
index 000000000..cec64895c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -0,0 +1,103 @@
+{-# LANGUAGE FlexibleContexts #-}
+module Text.Pandoc.Readers.OPML ( readOPML ) where
+import Data.Char (toUpper)
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Readers.Markdown (readMarkdown)
+import Text.XML.Light
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Data.Generics
+import Control.Monad.State
+import Data.Default
+import Text.Pandoc.Class (PandocMonad)
+
+type OPML m = StateT OPMLState m
+
+data OPMLState = OPMLState{
+ opmlSectionLevel :: Int
+ , opmlDocTitle :: Inlines
+ , opmlDocAuthors :: [Inlines]
+ , opmlDocDate :: Inlines
+ } deriving Show
+
+instance Default OPMLState where
+ def = OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+
+readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readOPML _ inp = do
+ (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp)
+ return $
+ setTitle (opmlDocTitle st') $
+ setAuthors (opmlDocAuthors st') $
+ setDate (opmlDocDate st') $
+ doc $ mconcat bs
+
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText (convertEntity r ++ s1) z):xs
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ go xs = xs
+
+convertEntity :: String -> String
+convertEntity e = maybe (map toUpper e) id (lookupEntity e)
+
+-- convenience function to get an attribute value, defaulting to ""
+attrValue :: String -> Element -> String
+attrValue attr elt =
+ case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
+ Just z -> z
+ Nothing -> ""
+
+-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
+-- exceptT = either throwError return
+
+asHtml :: PandocMonad m => String -> OPML m Inlines
+asHtml s =
+ (\(Pandoc _ bs) -> case bs of
+ [Plain ils] -> fromList ils
+ _ -> mempty) <$> (lift $ readHtml def s)
+
+asMarkdown :: PandocMonad m => String -> OPML m Blocks
+asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s)
+
+getBlocks :: PandocMonad m => Element -> OPML m Blocks
+getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+
+parseBlock :: PandocMonad m => Content -> OPML m Blocks
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "ownerName" -> mempty <$ modify (\st ->
+ st{opmlDocAuthors = [text $ strContent e]})
+ "dateModified" -> mempty <$ modify (\st ->
+ st{opmlDocDate = text $ strContent e})
+ "title" -> mempty <$ modify (\st ->
+ st{opmlDocTitle = text $ strContent e})
+ "outline" -> gets opmlSectionLevel >>= sect . (+1)
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where sect n = do headerText <- asHtml $ attrValue "text" e
+ noteBlocks <- asMarkdown $ attrValue "_note" e
+ modify $ \st -> st{ opmlSectionLevel = n }
+ bs <- getBlocks e
+ modify $ \st -> st{ opmlSectionLevel = n - 1 }
+ let headerText' = case map toUpper (attrValue "type" e) of
+ "LINK" -> link
+ (attrValue "url" e) "" headerText
+ _ -> headerText
+ return $ header n headerText' <> noteBlocks <> bs
+parseBlock _ = return mempty
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
new file mode 100644
index 000000000..ac22f2c09
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -0,0 +1,113 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Entry point to the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt ( readOdt ) where
+
+import Codec.Archive.Zip
+import qualified Text.XML.Light as XML
+
+import qualified Data.ByteString.Lazy as B
+
+import System.FilePath
+
+import Control.Monad.Except (throwError)
+
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.MediaBag
+import qualified Text.Pandoc.UTF8 as UTF8
+
+import Text.Pandoc.Readers.Odt.ContentReader
+import Text.Pandoc.Readers.Odt.StyleReader
+
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Shared (filteredFilesFromArchive)
+
+readOdt :: PandocMonad m
+ => ReaderOptions
+ -> B.ByteString
+ -> m Pandoc
+readOdt opts bytes = case readOdt' opts bytes of
+ Right (doc, mb) -> do
+ P.setMediaBag mb
+ return doc
+ Left e -> throwError e
+
+--
+readOdt' :: ReaderOptions
+ -> B.ByteString
+ -> Either PandocError (Pandoc, MediaBag)
+readOdt' _ bytes = bytesToOdt bytes-- of
+-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
+-- Left err -> Left err
+
+--
+bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
+bytesToOdt bytes = case toArchiveOrFail bytes of
+ Right archive -> archiveToOdt archive
+ Left _ -> Left $ PandocParseError "Couldn't parse odt file."
+
+--
+archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
+archiveToOdt archive
+ | Just contentEntry <- findEntryByPath "content.xml" archive
+ , Just stylesEntry <- findEntryByPath "styles.xml" archive
+ , Just contentElem <- entryToXmlElem contentEntry
+ , Just stylesElem <- entryToXmlElem stylesEntry
+ , Right styles <- chooseMax (readStylesAt stylesElem )
+ (readStylesAt contentElem)
+ , media <- filteredFilesFromArchive archive filePathIsOdtMedia
+ , startState <- readerState styles media
+ , Right pandocWithMedia <- runConverter' read_body
+ startState
+ contentElem
+
+ = Right pandocWithMedia
+
+ | otherwise
+ -- Not very detailed, but I don't think more information would be helpful
+ = Left $ PandocParseError "Couldn't parse odt file."
+ where
+ filePathIsOdtMedia :: FilePath -> Bool
+ filePathIsOdtMedia fp =
+ let (dir, _) = splitFileName fp
+ in
+ (dir == "Pictures/")
+
+
+--
+entryToXmlElem :: Entry -> Maybe XML.Element
+entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
new file mode 100644
index 000000000..b056f1ecc
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -0,0 +1,253 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Arrows.State
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+An arrow that transports a state. It is in essence a more powerful version of
+the standard state monad. As it is such a simple extension, there are
+other version out there that do exactly the same.
+The implementation is duplicated, though, to add some useful features.
+Most of these might be implemented without access to innards, but it's much
+faster and easier to implement this way.
+-}
+
+module Text.Pandoc.Readers.Odt.Arrows.State where
+
+import Prelude hiding ( foldr, foldl )
+
+import qualified Control.Category as Cat
+import Control.Arrow
+import Control.Monad
+
+import Data.Foldable
+import Data.Monoid
+
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+
+
+newtype ArrowState state a b = ArrowState
+ { runArrowState :: (state, a) -> (state, b) }
+
+-- | Constructor
+withState :: (state -> a -> (state, b)) -> ArrowState state a b
+withState = ArrowState . uncurry
+
+-- | Constructor
+withState' :: ((state, a) -> (state, b)) -> ArrowState state a b
+withState' = ArrowState
+
+-- | Constructor
+modifyState :: (state -> state ) -> ArrowState state a a
+modifyState = ArrowState . first
+
+-- | Constructor
+ignoringState :: ( a -> b ) -> ArrowState state a b
+ignoringState = ArrowState . second
+
+-- | Constructor
+fromState :: (state -> (state, b)) -> ArrowState state a b
+fromState = ArrowState . (.fst)
+
+-- | Constructor
+extractFromState :: (state -> b ) -> ArrowState state x b
+extractFromState f = ArrowState $ \(state,_) -> (state, f state)
+
+-- | Constructor
+withUnchangedState :: (state -> a -> b ) -> ArrowState state a b
+withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a)
+
+-- | Constructor
+tryModifyState :: (state -> Either f state)
+ -> ArrowState state a (Either f a)
+tryModifyState f = ArrowState $ \(state,a)
+ -> (state,).Left ||| (,Right a) $ f state
+
+instance Cat.Category (ArrowState s) where
+ id = ArrowState id
+ arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1)
+
+instance Arrow (ArrowState state) where
+ arr = ignoringState
+ first a = ArrowState $ \(s,(aF,aS))
+ -> second (,aS) $ runArrowState a (s,aF)
+ second a = ArrowState $ \(s,(aF,aS))
+ -> second (aF,) $ runArrowState a (s,aS)
+
+instance ArrowChoice (ArrowState state) where
+ left a = ArrowState $ \(s,e) -> case e of
+ Left l -> second Left $ runArrowState a (s,l)
+ Right r -> (s, Right r)
+ right a = ArrowState $ \(s,e) -> case e of
+ Left l -> (s, Left l)
+ Right r -> second Right $ runArrowState a (s,r)
+
+instance ArrowLoop (ArrowState state) where
+ loop a = ArrowState $ \(s, x)
+ -> let (s', (x', _d)) = runArrowState a (s, (x, _d))
+ in (s', x')
+
+instance ArrowApply (ArrowState state) where
+ app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b)
+
+
+-- | Embedding of a state arrow in a state arrow with a different state type.
+switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y
+switchState there back a = ArrowState $ first there
+ >>> runArrowState a
+ >>> first back
+
+-- | Lift a state arrow to modify the state of an arrow
+-- with a different state type.
+liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x
+liftToState unlift a = modifyState $ unlift &&& id
+ >>> runArrowState a
+ >>> snd
+
+-- | Switches the type of the state temporarily.
+-- Drops the intermediate result state, behaving like the identity arrow,
+-- save for side effects in the state.
+withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x
+withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst
+
+-- | Switches the type of the state temporarily.
+-- Returns the resulting sub-state.
+withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s'
+withSubState' unlift a = ArrowState $ runArrowState unlift
+ >>> switch
+ >>> runArrowState a
+ >>> switch
+ where switch (x,y) = (y,x)
+
+-- | Switches the type of the state temporarily.
+-- Drops the intermediate result state, behaving like a fallible
+-- identity arrow, save for side effects in the state.
+withSubStateF :: ArrowState s x (Either f s')
+ -> ArrowState s' s (Either f s )
+ -> ArrowState s x (Either f x )
+withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a)
+ >>^ spreadChoice
+ >>^ fmap fst
+
+-- | Switches the type of the state temporarily.
+-- Returns the resulting sub-state.
+withSubStateF' :: ArrowState s x (Either f s')
+ -> ArrowState s' s (Either f s )
+ -> ArrowState s x (Either f s')
+withSubStateF' unlift a = ArrowState go
+ where go p@(s,_) = tryRunning unlift
+ ( tryRunning a (second Right) )
+ p
+ where tryRunning a' b v = case runArrowState a' v of
+ (_ , Left f) -> (s, Left f)
+ (x , Right y) -> b (y,x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results
+-- in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
+foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f
+ where a' x (s',m) = second (m <>) $ runArrowState a (s',x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results
+-- in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m
+foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f
+ where a' (s',m) x = second (m <>) $ runArrowState a (s',x)
+
+-- | Fold a fallible state arrow through something 'Foldable'. Collect the
+-- results in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+-- If the iteration fails, the state will be reset to the initial one.
+foldS' :: (Foldable f, Monoid m)
+ => ArrowState s x (Either e m)
+ -> ArrowState s (f x) (Either e m)
+foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f
+ where a' s x (s',Right m) = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'', Right (m <> m'))
+ (_ ,Left e ) -> (s , Left e)
+ a' _ _ e = e
+
+-- | Fold a fallible state arrow through something 'Foldable'. Collect the
+-- results in a 'Monoid'.
+-- Intermediate form of a fold between one with "only" a 'Monoid'
+-- and one with any function.
+-- If the iteration fails, the state will be reset to the initial one.
+foldSL' :: (Foldable f, Monoid m)
+ => ArrowState s x (Either e m)
+ -> ArrowState s (f x) (Either e m)
+foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f
+ where a' s (s',Right m) x = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'', Right (m <> m'))
+ (_ ,Left e ) -> (s , Left e)
+ a' _ e _ = e
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results in a
+-- 'MonadPlus'.
+iterateS :: (Foldable f, MonadPlus m)
+ => ArrowState s x y
+ -> ArrowState s (f x) (m y)
+iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
+ where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x)
+
+-- | Fold a state arrow through something 'Foldable'. Collect the results in a
+-- 'MonadPlus'.
+iterateSL :: (Foldable f, MonadPlus m)
+ => ArrowState s x y
+ -> ArrowState s (f x) (m y)
+iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+ where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x)
+
+
+-- | Fold a fallible state arrow through something 'Foldable'.
+-- Collect the results in a 'MonadPlus'.
+-- If the iteration fails, the state will be reset to the initial one.
+iterateS' :: (Foldable f, MonadPlus m)
+ => ArrowState s x (Either e y )
+ -> ArrowState s (f x) (Either e (m y))
+iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f
+ where a' s x (s',Right m) = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'',Right $ mplus m $ return m')
+ (_ ,Left e ) -> (s ,Left e )
+ a' _ _ e = e
+
+-- | Fold a fallible state arrow through something 'Foldable'.
+-- Collect the results in a 'MonadPlus'.
+-- If the iteration fails, the state will be reset to the initial one.
+iterateSL' :: (Foldable f, MonadPlus m)
+ => ArrowState s x (Either e y )
+ -> ArrowState s (f x) (Either e (m y))
+iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f
+ where a' s (s',Right m) x = case runArrowState a (s',x) of
+ (s'',Right m') -> (s'',Right $ mplus m $ return m')
+ (_ ,Left e ) -> (s ,Left e )
+ a' _ e _ = e
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
new file mode 100644
index 000000000..218a85661
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -0,0 +1,495 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Arrows.Utils
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Utility functions for Arrows (Kleisli monads).
+
+Some general notes on notation:
+
+* "^" is meant to stand for a pure function that is lifted into an arrow
+based on its usage for that purpose in "Control.Arrow".
+* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function
+with an equivalent return value.
+* "_" stands for the dropping of a value.
+-}
+
+-- We export everything
+module Text.Pandoc.Readers.Odt.Arrows.Utils where
+
+import Control.Arrow
+import Control.Monad ( join, MonadPlus(..) )
+
+import qualified Data.Foldable as F
+import Data.Monoid
+
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
+
+and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c')
+and2 = (&&&)
+
+and3 :: (Arrow a)
+ => a b c0->a b c1->a b c2
+ -> a b (c0,c1,c2 )
+and4 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3
+ -> a b (c0,c1,c2,c3 )
+and5 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4
+ -> a b (c0,c1,c2,c3,c4 )
+and6 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5
+ -> a b (c0,c1,c2,c3,c4,c5 )
+and7 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6
+ -> a b (c0,c1,c2,c3,c4,c5,c6 )
+and8 :: (Arrow a)
+ => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7
+ -> a b (c0,c1,c2,c3,c4,c5,c6,c7)
+
+and3 a b c = (and2 a b ) &&& c
+ >>^ \((z,y ) , x) -> (z,y,x )
+and4 a b c d = (and3 a b c ) &&& d
+ >>^ \((z,y,x ) , w) -> (z,y,x,w )
+and5 a b c d e = (and4 a b c d ) &&& e
+ >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v )
+and6 a b c d e f = (and5 a b c d e ) &&& f
+ >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u )
+and7 a b c d e f g = (and6 a b c d e f ) &&& g
+ >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t )
+and8 a b c d e f g h = (and7 a b c d e f g) &&& h
+ >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s)
+
+liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z
+liftA2 f a b = a &&& b >>^ uncurry f
+
+liftA3 :: (Arrow a) => (z->y->x -> r)
+ -> a b z->a b y->a b x
+ -> a b r
+liftA4 :: (Arrow a) => (z->y->x->w -> r)
+ -> a b z->a b y->a b x->a b w
+ -> a b r
+liftA5 :: (Arrow a) => (z->y->x->w->v -> r)
+ -> a b z->a b y->a b x->a b w->a b v
+ -> a b r
+liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u
+ -> a b r
+liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u->a b t
+ -> a b r
+liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r)
+ -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s
+ -> a b r
+
+liftA3 fun a b c = and3 a b c >>^ uncurry3 fun
+liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun
+liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun
+liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun
+liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun
+liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun
+
+liftA :: (Arrow a) => (y -> z) -> a b y -> a b z
+liftA fun a = a >>^ fun
+
+
+-- | Duplicate a value to subsequently feed it into different arrows.
+-- Can almost always be replaced with '(&&&)', 'keepingTheValue',
+-- or even '(|||)'.
+-- Aequivalent to
+-- > returnA &&& returnA
+duplicate :: (Arrow a) => a b (b,b)
+duplicate = arr $ join (,)
+
+-- | Lifts the combination of two values into an arrow.
+joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z
+joinOn = arr.uncurry
+
+-- | Applies a function to the uncurried result-pair of an arrow-application.
+-- (The %-symbol was chosen to evoke an association with pairs.)
+(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d
+a >>% f = a >>^ uncurry f
+
+-- | '(>>%)' with its arguments flipped
+(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d
+(%<<) = flip (>>%)
+
+-- | Precomposition with an uncurried function
+(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r
+f %>> a = uncurry f ^>> a
+
+-- | Precomposition with an uncurried function (right to left variant)
+(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r
+(<<%) = flip (%>>)
+
+infixr 2 >>%, %<<, %>>, <<%
+
+
+-- | Duplicate a value and apply an arrow to the second instance.
+-- Aequivalent to
+-- > \a -> duplicate >>> second a
+-- or
+-- > \a -> returnA &&& a
+keepingTheValue :: (Arrow a) => a b c -> a b (b,c)
+keepingTheValue a = returnA &&& a
+
+-- | Duplicate a value and apply an arrow to the first instance.
+-- Aequivalent to
+-- > \a -> duplicate >>> first a
+-- or
+-- > \a -> a &&& returnA
+keepingTheValue' :: (Arrow a) => a b c -> a b (c,b)
+keepingTheValue' a = a &&& returnA
+
+-- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'.
+-- Actually, it's the more complex '(>=>)', because 'bind' alone does not
+-- combine as nicely in arrow form.
+-- The current implementation is not the most efficient one, because it can
+-- not return directly if a 'Nothing' is encountered. That in turn follows
+-- from the type system, as 'Nothing' has an "invisible" type parameter that
+-- can not be dropped early.
+--
+-- Also, there probably is a way to generalize this to other monads
+-- or applicatives, but I'm leaving that as an exercise to the reader.
+-- I have a feeling there is a new Arrow-typeclass to be found that is less
+-- restrictive than 'ArrowApply'. If it is already out there,
+-- I have not seen it yet. ('ArrowPlus' for example is not general enough.)
+(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c)
+a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join
+
+infixr 2 >>>=
+
+-- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required.
+-- (But still different from a true bind)
+(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b)
+(>++<) = liftA2 mplus
+
+-- | Left-compose with a pure function
+leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r)
+leftLift = left.arr
+
+-- | Right-compose with a pure function
+rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r')
+rightLift = right.arr
+
+
+( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c')
+( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c')
+( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c')
+
+l ^+++ r = leftLift l >>> right r
+l +++^ r = left l >>> rightLift r
+l ^+++^ r = leftLift l >>> rightLift r
+
+infixr 2 ^+++, +++^, ^+++^
+
+( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d
+( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d
+( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d
+
+l ^||| r = arr l ||| r
+l |||^ r = l ||| arr r
+l ^|||^ r = arr l ||| arr r
+
+infixr 2 ^||| , |||^, ^|||^
+
+( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c')
+( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c')
+( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c')
+
+l ^&&& r = arr l &&& r
+l &&&^ r = l &&& arr r
+l ^&&&^ r = arr l &&& arr r
+
+infixr 3 ^&&&, &&&^, ^&&&^
+
+( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c')
+( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c')
+( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c')
+
+l ^*** r = arr l *** r
+l ***^ r = l *** arr r
+l ^***^ r = arr l *** arr r
+
+infixr 3 ^***, ***^, ^***^
+
+-- | A version of
+--
+-- >>> \p -> arr (\x -> if p x the Right x else Left x)
+--
+-- but with p being an arrow
+choose :: (ArrowChoice a) => a b Bool -> a b (Either b b)
+choose checkValue = keepingTheValue checkValue >>^ select
+ where select (x,True ) = Right x
+ select (x,False ) = Left x
+
+-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@.
+choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r)
+choiceToMaybe = arr eitherToMaybe
+
+-- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@.
+maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b)
+maybeToChoice = arr maybeToEither
+
+-- | Lifts a constant value into an arrow
+returnV :: (Arrow a) => c -> a x c
+returnV = arr.const
+
+-- | 'returnA' dropping everything
+returnA_ :: (Arrow a) => a _b ()
+returnA_ = returnV ()
+
+-- | Wrapper for an arrow that can be evaluated im parallel. All
+-- Arrows can be evaluated in parallel, as long as they return a
+-- monoid.
+newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c }
+ deriving (Eq, Ord, Show)
+
+instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where
+ mempty = CoEval $ returnV mempty
+ (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend
+
+-- | Evaluates a collection of arrows in a parallel fashion.
+--
+-- This is in essence a fold of '(&&&)' over the collection,
+-- so the actual execution order and parallelity depends on the
+-- implementation of '(&&&)' in the arrow in question.
+-- The default implementation of '(&&&)' for example keeps the
+-- order as given in the collection.
+--
+-- This function can be seen as a generalization of
+-- 'Control.Applicative.sequenceA' to arrows or as an alternative to
+-- a fold with 'Control.Applicative.WrappedArrow', which
+-- substitutes the monoid with function application.
+--
+coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m
+coEval = evalParallelArrow . (F.foldMap CoEval)
+
+-- | Defines Left as failure, Right as success
+type FallibleArrow a input failure success = a input (Either failure success)
+
+type ReFallibleArrow a failure success success'
+ = FallibleArrow a (Either failure success) failure success'
+
+-- | Wrapper for fallible arrows. Fallible arrows are all arrows that return
+-- an Either value where left is a faliure and right is a success value.
+newtype AlternativeArrow a input failure success
+ = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success }
+
+
+instance (ArrowChoice a, Monoid failure)
+ => Monoid (AlternativeArrow a input failure success) where
+ mempty = TryArrow $ returnV $ Left mempty
+ (TryArrow a) `mappend` (TryArrow b)
+ = TryArrow $ a &&& b
+ >>^ \(a',~b')
+ -> ( (\a'' -> left (mappend a'') b') ||| Right )
+ a'
+
+-- | Evaluates a collection of fallible arrows, trying each one in succession.
+-- Left values are interpreted as failures, right values as successes.
+--
+-- The evaluation is stopped once an arrow succeeds.
+-- Up to that point, all failures are collected in the failure-monoid.
+-- Note that '()' is a monoid, and thus can serve as a failure-collector if
+-- you are uninterested in the exact failures.
+--
+-- This is in essence a fold of '(&&&)' over the collection, enhanced with a
+-- little bit of repackaging, so the actual execution order depends on the
+-- implementation of '(&&&)' in the arrow in question.
+-- The default implementation of '(&&&)' for example keeps the
+-- order as given in the collection.
+--
+tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure)
+ => f (FallibleArrow a b failure success)
+ -> FallibleArrow a b failure success
+tryArrows = evalAlternativeArrow . (F.foldMap TryArrow)
+
+--
+liftSuccess :: (ArrowChoice a)
+ => (success -> success')
+ -> ReFallibleArrow a failure success success'
+liftSuccess = rightLift
+
+--
+liftAsSuccess :: (ArrowChoice a)
+ => a x success
+ -> FallibleArrow a x failure success
+liftAsSuccess a = a >>^ Right
+
+--
+asFallibleArrow :: (ArrowChoice a)
+ => a x success
+ -> FallibleArrow a x failure success
+asFallibleArrow a = a >>^ Right
+
+-- | Raises an error into a 'ReFallibleArrow' if the arrow is already in
+-- "error mode"
+liftError :: (ArrowChoice a, Monoid failure)
+ => failure
+ -> ReFallibleArrow a failure success success
+liftError e = leftLift (e <>)
+
+-- | Raises an error into a 'FallibleArrow', droping both the arrow input
+-- and any previously stored error value.
+_raiseA :: (ArrowChoice a)
+ => failure
+ -> FallibleArrow a x failure success
+_raiseA e = returnV (Left e)
+
+-- | Raises an empty error into a 'FallibleArrow', droping both the arrow input
+-- and any previously stored error value.
+_raiseAEmpty :: (ArrowChoice a, Monoid failure)
+ => FallibleArrow a x failure success
+_raiseAEmpty = _raiseA mempty
+
+-- | Raises an error into a 'ReFallibleArrow', possibly appending the new error
+-- to an existing one
+raiseA :: (ArrowChoice a, Monoid failure)
+ => failure
+ -> ReFallibleArrow a failure success success
+raiseA e = arr $ Left.(either (<> e) (const e))
+
+-- | Raises an empty error into a 'ReFallibleArrow'. If there already is an
+-- error, nothing changes.
+-- (Note that this function is only aequivalent to @raiseA mempty@ iff the
+-- failure monoid follows the monoid laws.)
+raiseAEmpty :: (ArrowChoice a, Monoid failure)
+ => ReFallibleArrow a failure success success
+raiseAEmpty = arr (fromRight (const mempty) >>> Left)
+
+
+-- | Execute the second arrow if the first succeeds
+(>>?) :: (ArrowChoice a)
+ => FallibleArrow a x failure success
+ -> FallibleArrow a success failure success'
+ -> FallibleArrow a x failure success'
+a >>? b = a >>> Left ^||| b
+
+-- | Execute the lifted second arrow if the first succeeds
+(>>?^) :: (ArrowChoice a)
+ => FallibleArrow a x failure success
+ -> (success -> success')
+ -> FallibleArrow a x failure success'
+a >>?^ f = a >>^ Left ^|||^ Right . f
+
+-- | Execute the lifted second arrow if the first succeeds
+(>>?^?) :: (ArrowChoice a)
+ => FallibleArrow a x failure success
+ -> (success -> Either failure success')
+ -> FallibleArrow a x failure success'
+a >>?^? b = a >>> Left ^|||^ b
+
+-- | Execute the second arrow if the lifted first arrow succeeds
+(^>>?) :: (ArrowChoice a)
+ => (x -> Either failure success)
+ -> FallibleArrow a success failure success'
+ -> FallibleArrow a x failure success'
+a ^>>? b = a ^>> Left ^||| b
+
+-- | Execute the lifted second arrow if the lifted first arrow succeeds
+(^>>?^) :: (ArrowChoice a)
+ => (x -> Either failure success)
+ -> (success -> success')
+ -> FallibleArrow a x failure success'
+a ^>>?^ f = arr $ a >>> right f
+
+-- | Execute the lifted second arrow if the lifted first arrow succeeds
+(^>>?^?) :: (ArrowChoice a)
+ => (x -> Either failure success)
+ -> (success -> Either failure success')
+ -> FallibleArrow a x failure success'
+a ^>>?^? f = a ^>> Left ^|||^ f
+
+-- | Execute the second, non-fallible arrow if the first arrow succeeds
+(>>?!) :: (ArrowChoice a)
+ => FallibleArrow a x failure success
+ -> a success success'
+ -> FallibleArrow a x failure success'
+a >>?! f = a >>> right f
+
+---
+(>>?%) :: (ArrowChoice a)
+ => FallibleArrow a x f (b,b')
+ -> (b -> b' -> c)
+ -> FallibleArrow a x f c
+a >>?% f = a >>?^ (uncurry f)
+
+---
+(^>>?%) :: (ArrowChoice a)
+ => (x -> Either f (b,b'))
+ -> (b -> b' -> c)
+ -> FallibleArrow a x f c
+a ^>>?% f = arr a >>?^ (uncurry f)
+
+---
+(>>?%?) :: (ArrowChoice a)
+ => FallibleArrow a x f (b,b')
+ -> (b -> b' -> (Either f c))
+ -> FallibleArrow a x f c
+a >>?%? f = a >>?^? (uncurry f)
+
+infixr 1 >>?, >>?^, >>?^?
+infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?!
+infixr 1 >>?%, ^>>?%, >>?%?
+
+-- | Keep values that are Right, replace Left values by a constant.
+ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v
+ifFailedUse v = arr $ either (const v) id
+
+-- | '(&&)' lifted into an arrow
+(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
+(<&&>) = liftA2 (&&)
+
+-- | '(||)' lifted into an arrow
+(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool
+(<||>) = liftA2 (||)
+
+-- | An equivalent of '(&&)' in a fallible arrow
+(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s
+ -> FallibleArrow a x f s'
+ -> FallibleArrow a x f (s,s')
+(>&&<) = liftA2 chooseMin
+
+-- | An equivalent of '(||)' in some forms of fallible arrows
+(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s
+ -> FallibleArrow a x f s
+ -> FallibleArrow a x f s
+(>||<) = liftA2 chooseMax
+
+-- | An arrow version of a short-circuit (<|>)
+ifFailedDo :: (ArrowChoice a)
+ => FallibleArrow a x f y
+ -> FallibleArrow a x f y
+ -> FallibleArrow a x f y
+ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right)
+ where repackage (x , Left _) = Left x
+ repackage (_ , Right y) = Right y
+
+infixr 4 <&&>, <||>, >&&<, >||<
+infixr 1 `ifFailedDo`
+
+
diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs
new file mode 100644
index 000000000..1f095bade
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Base.hs
@@ -0,0 +1,43 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Base
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Core types of the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt.Base where
+
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Namespaces
+
+type OdtConverterState s = XMLConverterState Namespace s
+
+type XMLReader s a b = FallibleXMLConverter Namespace s a b
+
+type XMLReaderSafe s a b = XMLConverter Namespace s a b
+
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
new file mode 100644
index 000000000..a1bd8cb59
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -0,0 +1,929 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE RecordWildCards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.ContentReader
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+The core of the odt reader that converts odt features into Pandoc types.
+-}
+
+module Text.Pandoc.Readers.Odt.ContentReader
+( readerState
+, read_body
+) where
+
+import Control.Arrow
+import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Map as M
+import Data.List ( find, intercalate )
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
+import Text.Pandoc.Shared
+
+import Text.Pandoc.Readers.Odt.Base
+import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.StyleReader
+
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.Utils
+
+import qualified Data.Set as Set
+
+--------------------------------------------------------------------------------
+-- State
+--------------------------------------------------------------------------------
+
+type Anchor = String
+type Media = [(FilePath, B.ByteString)]
+
+data ReaderState
+ = ReaderState { -- | A collection of styles read somewhere else.
+ -- It is only queried here, not modified.
+ styleSet :: Styles
+ -- | A stack of the styles of parent elements.
+ -- Used to look up inherited style properties.
+ , styleTrace :: [Style]
+ -- | Keeps track of the current depth in nested lists
+ , currentListLevel :: ListLevel
+ -- | Lists may provide their own style, but they don't have
+ -- to. If they do not, the style of a parent list may be used
+ -- or even a default list style from the paragraph style.
+ -- This value keeps track of the closest list style there
+ -- currently is.
+ , currentListStyle :: Maybe ListStyle
+ -- | A map from internal anchor names to "pretty" ones.
+ -- The mapping is a purely cosmetic one.
+ , bookmarkAnchors :: M.Map Anchor Anchor
+ -- | A map of files / binary data from the archive
+ , envMedia :: Media
+ -- | Hold binary resources used in the document
+ , odtMediaBag :: MediaBag
+-- , sequences
+-- , trackedChangeIDs
+ }
+ deriving ( Show )
+
+readerState :: Styles -> Media -> ReaderState
+readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty
+
+--
+pushStyle' :: Style -> ReaderState -> ReaderState
+pushStyle' style state = state { styleTrace = style : styleTrace state }
+
+--
+popStyle' :: ReaderState -> ReaderState
+popStyle' state = case styleTrace state of
+ _:trace -> state { styleTrace = trace }
+ _ -> state
+
+--
+modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState)
+modifyListLevel f state = state { currentListLevel = f (currentListLevel state) }
+
+--
+shiftListLevel :: ListLevel -> (ReaderState -> ReaderState)
+shiftListLevel diff = modifyListLevel (+ diff)
+
+--
+swapCurrentListStyle :: Maybe ListStyle -> ReaderState
+ -> (ReaderState, Maybe ListStyle)
+swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle }
+ , currentListStyle state
+ )
+
+--
+lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor
+lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors
+
+--
+putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState
+putPrettyAnchor ugly pretty state@ReaderState{..}
+ = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors }
+
+--
+usedAnchors :: ReaderState -> [Anchor]
+usedAnchors ReaderState{..} = M.elems bookmarkAnchors
+
+getMediaBag :: ReaderState -> MediaBag
+getMediaBag ReaderState{..} = odtMediaBag
+
+getMediaEnv :: ReaderState -> Media
+getMediaEnv ReaderState{..} = envMedia
+
+insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState
+insertMedia' (fp, bs) state@ReaderState{..}
+ = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag }
+
+--------------------------------------------------------------------------------
+-- Reader type and associated tools
+--------------------------------------------------------------------------------
+
+type OdtReader a b = XMLReader ReaderState a b
+
+type OdtReaderSafe a b = XMLReaderSafe ReaderState a b
+
+-- | Extract something from the styles
+fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b
+fromStyles f = keepingTheValue
+ (getExtraState >>^ styleSet)
+ >>% f
+
+--
+getStyleByName :: OdtReader StyleName Style
+getStyleByName = fromStyles lookupStyle >>^ maybeToChoice
+
+--
+findStyleFamily :: OdtReader Style StyleFamily
+findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice
+
+--
+lookupListStyle :: OdtReader StyleName ListStyle
+lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice
+
+--
+switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle)
+switchCurrentListStyle = keepingTheValue getExtraState
+ >>% swapCurrentListStyle
+ >>> first setExtraState
+ >>^ snd
+
+--
+pushStyle :: OdtReaderSafe Style Style
+pushStyle = keepingTheValue (
+ ( keepingTheValue getExtraState
+ >>% pushStyle'
+ )
+ >>> setExtraState
+ )
+ >>^ fst
+
+--
+popStyle :: OdtReaderSafe x x
+popStyle = keepingTheValue (
+ getExtraState
+ >>> arr popStyle'
+ >>> setExtraState
+ )
+ >>^ fst
+
+--
+getCurrentListLevel :: OdtReaderSafe _x ListLevel
+getCurrentListLevel = getExtraState >>^ currentListLevel
+
+--
+updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString)
+updateMediaWithResource = keepingTheValue (
+ (keepingTheValue getExtraState
+ >>% insertMedia'
+ )
+ >>> setExtraState
+ )
+ >>^ fst
+
+lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource = proc target -> do
+ state <- getExtraState -< ()
+ case lookup target (getMediaEnv state) of
+ Just bs -> returnV (target, bs) -<< ()
+ Nothing -> returnV ("", B.empty) -< ()
+
+type AnchorPrefix = String
+
+-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
+-- unique identifier but without assuming that the id should be for a header.
+-- Second argument is a list of already used identifiers.
+uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
+uniqueIdentFrom baseIdent usedIdents =
+ let numIdent n = baseIdent ++ "-" ++ show n
+ in if baseIdent `elem` usedIdents
+ then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
+ Just x -> numIdent x
+ Nothing -> baseIdent -- if we have more than 60,000, allow repeats
+ else baseIdent
+
+-- | First argument: basis for a new "pretty" anchor if none exists yet
+-- Second argument: a key ("ugly" anchor)
+-- Returns: saved "pretty" anchor or created new one
+getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor
+getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
+ state <- getExtraState -< ()
+ case lookupPrettyAnchor uglyAnchor state of
+ Just prettyAnchor -> returnA -< prettyAnchor
+ Nothing -> do
+ let newPretty = uniqueIdentFrom baseIdent (usedAnchors state)
+ modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty
+
+-- | Input: basis for a new header anchor
+-- Ouput: saved new anchor
+getHeaderAnchor :: OdtReaderSafe Inlines Anchor
+getHeaderAnchor = proc title -> do
+ state <- getExtraState -< ()
+ let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
+ modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
+
+
+--------------------------------------------------------------------------------
+-- Working with styles
+--------------------------------------------------------------------------------
+
+--
+readStyleByName :: OdtReader _x (StyleName, Style)
+readStyleByName =
+ findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE
+ where
+ liftE :: (StyleName, Fallible Style) -> Fallible (StyleName, Style)
+ liftE (name, Right v) = Right (name, v)
+ liftE (_, Left v) = Left v
+
+--
+isStyleToTrace :: OdtReader Style Bool
+isStyleToTrace = findStyleFamily >>?^ (==FaText)
+
+--
+withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines
+withNewStyle a = proc x -> do
+ fStyle <- readStyleByName -< ()
+ case fStyle of
+ Right (styleName, _) | isCodeStyle styleName -> do
+ inlines <- a -< x
+ arr inlineCode -<< inlines
+ Right (_, style) -> do
+ mFamily <- arr styleFamily -< style
+ fTextProps <- arr ( maybeToChoice
+ . textProperties
+ . styleProperties
+ ) -< style
+ case fTextProps of
+ Right textProps -> do
+ state <- getExtraState -< ()
+ let triple = (state, textProps, mFamily)
+ modifier <- arr modifierFromStyleDiff -< triple
+ fShouldTrace <- isStyleToTrace -< style
+ case fShouldTrace of
+ Right shouldTrace -> do
+ if shouldTrace
+ then do
+ pushStyle -< style
+ inlines <- a -< x
+ popStyle -< ()
+ arr modifier -<< inlines
+ else
+ -- In case anything goes wrong
+ a -< x
+ Left _ -> a -< x
+ Left _ -> a -< x
+ Left _ -> a -< x
+ where
+ isCodeStyle :: StyleName -> Bool
+ isCodeStyle "Source_Text" = True
+ isCodeStyle _ = False
+
+ inlineCode :: Inlines -> Inlines
+ inlineCode = code . intercalate "" . map stringify . toList
+
+type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
+type InlineModifier = Inlines -> Inlines
+
+-- | Given data about the local style changes, calculates how to modify
+-- an instance of 'Inlines'
+modifierFromStyleDiff :: PropertyTriple -> InlineModifier
+modifierFromStyleDiff propertyTriple =
+ composition $
+ (getVPosModifier propertyTriple)
+ : map (first ($ propertyTriple) >>> ifThen_else ignore)
+ [ (hasEmphChanged , emph )
+ , (hasChanged isStrong , strong )
+ , (hasChanged strikethrough , strikeout )
+ ]
+ where
+ ifThen_else else' (if',then') = if if' then then' else else'
+
+ ignore = id :: InlineModifier
+
+ getVPosModifier :: PropertyTriple -> InlineModifier
+ getVPosModifier triple@(_,textProps,_) =
+ let getVPos = Just . verticalPosition
+ in case lookupPreviousValueM getVPos triple of
+ Nothing -> ignore
+ Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps)
+
+ getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore
+ getVPosModifier' ( _ , VPosSub ) = subscript
+ getVPosModifier' ( _ , VPosSuper ) = superscript
+ getVPosModifier' ( _ , _ ) = ignore
+
+ hasEmphChanged :: PropertyTriple -> Bool
+ hasEmphChanged = swing any [ hasChanged isEmphasised
+ , hasChangedM pitch
+ , hasChanged underline
+ ]
+
+ hasChanged property triple@(_, property -> newProperty, _) =
+ maybe True (/=newProperty) (lookupPreviousValue property triple)
+
+ hasChangedM property triple@(_, textProps,_) =
+ fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple
+
+ lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties)
+
+ lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties)
+
+ lookupPreviousStyleValue f (ReaderState{..},_,mFamily)
+ = ( findBy f $ extendedStylePropertyChain styleTrace styleSet )
+ <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily )
+
+
+type ParaModifier = Blocks -> Blocks
+
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5
+_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5
+
+-- | Returns either 'id' or 'blockQuote' depending on the current indentation
+getParaModifier :: Style -> ParaModifier
+getParaModifier Style{..} | Just props <- paraProperties styleProperties
+ , isBlockQuote (indentation props)
+ (margin_left props)
+ = blockQuote
+ | otherwise
+ = id
+ where
+ isBlockQuote mIndent mMargin
+ | LengthValueMM indent <- mIndent
+ , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+ = True
+ | LengthValueMM margin <- mMargin
+ , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+ = True
+ | LengthValueMM indent <- mIndent
+ , LengthValueMM margin <- mMargin
+ = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_
+
+ | PercentValue indent <- mIndent
+ , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+ = True
+ | PercentValue margin <- mMargin
+ , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+ = True
+ | PercentValue indent <- mIndent
+ , PercentValue margin <- mMargin
+ = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_
+
+ | otherwise
+ = False
+
+--
+constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks
+constructPara reader = proc blocks -> do
+ fStyle <- readStyleByName -< blocks
+ case fStyle of
+ Left _ -> reader -< blocks
+ Right (styleName, _) | isTableCaptionStyle styleName -> do
+ blocks' <- reader -< blocks
+ arr tableCaptionP -< blocks'
+ Right (_, style) -> do
+ let modifier = getParaModifier style
+ blocks' <- reader -< blocks
+ arr modifier -<< blocks'
+ where
+ isTableCaptionStyle :: StyleName -> Bool
+ isTableCaptionStyle "Table" = True
+ isTableCaptionStyle _ = False
+ tableCaptionP b = divWith ("", ["caption"], []) b
+
+type ListConstructor = [Blocks] -> Blocks
+
+getListConstructor :: ListLevelStyle -> ListConstructor
+getListConstructor ListLevelStyle{..} =
+ case listLevelType of
+ LltBullet -> bulletList
+ LltImage -> bulletList
+ LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat
+ listNumberDelim = toListNumberDelim listItemPrefix
+ listItemSuffix
+ in orderedListWith (listItemStart, listNumberStyle, listNumberDelim)
+ where
+ toListNumberStyle LinfNone = DefaultStyle
+ toListNumberStyle LinfNumber = Decimal
+ toListNumberStyle LinfRomanLC = LowerRoman
+ toListNumberStyle LinfRomanUC = UpperRoman
+ toListNumberStyle LinfAlphaLC = LowerAlpha
+ toListNumberStyle LinfAlphaUC = UpperAlpha
+ toListNumberStyle (LinfString _) = Example
+
+ toListNumberDelim Nothing (Just ".") = Period
+ toListNumberDelim (Just "" ) (Just ".") = Period
+ toListNumberDelim Nothing (Just ")") = OneParen
+ toListNumberDelim (Just "" ) (Just ")") = OneParen
+ toListNumberDelim (Just "(") (Just ")") = TwoParens
+ toListNumberDelim _ _ = DefaultDelim
+
+
+-- | Determines which style to use for a list, which level to use of that
+-- style, and which type of list to create as a result of this information.
+-- Then prepares the state for eventual child lists and constructs the list from
+-- the results.
+-- Two main cases are handled: The list may provide its own style or it may
+-- rely on a parent list's style. I the former case the current style in the
+-- state must be switched before and after the call to the child converter
+-- while in the latter the child converter can be called directly.
+-- If anything goes wrong, a default ordered-list-constructor is used.
+constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks
+constructList reader = proc x -> do
+ modifyExtraState (shiftListLevel 1) -< ()
+ listLevel <- getCurrentListLevel -< ()
+ fStyleName <- findAttr NsText "style-name" -< ()
+ case fStyleName of
+ Right styleName -> do
+ fListStyle <- lookupListStyle -< styleName
+ case fListStyle of
+ Right listStyle -> do
+ fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
+ case fLLS of
+ Just listLevelStyle -> do
+ oldListStyle <- switchCurrentListStyle -< Just listStyle
+ blocks <- constructListWith listLevelStyle -<< x
+ switchCurrentListStyle -< oldListStyle
+ returnA -< blocks
+ Nothing -> constructOrderedList -< x
+ Left _ -> constructOrderedList -< x
+ Left _ -> do
+ state <- getExtraState -< ()
+ mListStyle <- arr currentListStyle -< state
+ case mListStyle of
+ Just listStyle -> do
+ fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle)
+ case fLLS of
+ Just listLevelStyle -> constructListWith listLevelStyle -<< x
+ Nothing -> constructOrderedList -< x
+ Nothing -> constructOrderedList -< x
+ where
+ constructOrderedList =
+ reader
+ >>> modifyExtraState (shiftListLevel (-1))
+ >>^ orderedList
+ constructListWith listLevelStyle =
+ reader
+ >>> getListConstructor listLevelStyle
+ ^>> modifyExtraState (shiftListLevel (-1))
+
+--------------------------------------------------------------------------------
+-- Readers
+--------------------------------------------------------------------------------
+
+type ElementMatcher result = (Namespace, ElementName, OdtReader result result)
+
+type InlineMatcher = ElementMatcher Inlines
+
+type BlockMatcher = ElementMatcher Blocks
+
+
+--
+matchingElement :: (Monoid e)
+ => Namespace -> ElementName
+ -> OdtReaderSafe e e
+ -> ElementMatcher e
+matchingElement ns name reader = (ns, name, asResultAccumulator reader)
+ where
+ asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m)
+ asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>)
+
+--
+matchChildContent' :: (Monoid result)
+ => [ElementMatcher result]
+ -> OdtReaderSafe _x result
+matchChildContent' ls = returnV mempty >>> matchContent' ls
+
+--
+matchChildContent :: (Monoid result)
+ => [ElementMatcher result]
+ -> OdtReaderSafe (result, XML.Content) result
+ -> OdtReaderSafe _x result
+matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
+
+
+--------------------------------------------
+-- Matchers
+--------------------------------------------
+
+----------------------
+-- Basics
+----------------------
+
+--
+-- | Open Document allows several consecutive spaces if they are marked up
+read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines
+read_plain_text = fst ^&&& read_plain_text' >>% recover
+ where
+ -- fallible version
+ read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines
+ read_plain_text' = ( second ( arr extractText )
+ >>^ spreadChoice >>?! second text
+ )
+ >>?% (<>)
+ --
+ extractText :: XML.Content -> Fallible String
+ extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+ extractText _ = failEmpty
+
+read_text_seq :: InlineMatcher
+read_text_seq = matchingElement NsText "sequence"
+ $ matchChildContent [] read_plain_text
+
+
+-- specifically. I honor that, although the current implementation of '(<>)'
+-- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein.
+-- The rational is to be prepared for future modifications.
+read_spaces :: InlineMatcher
+read_spaces = matchingElement NsText "s" (
+ readAttrWithDefault NsText "c" 1 -- how many spaces?
+ >>^ fromList.(`replicate` Space)
+ )
+--
+read_line_break :: InlineMatcher
+read_line_break = matchingElement NsText "line-break"
+ $ returnV linebreak
+
+--
+read_span :: InlineMatcher
+read_span = matchingElement NsText "span"
+ $ withNewStyle
+ $ matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text
+
+--
+read_paragraph :: BlockMatcher
+read_paragraph = matchingElement NsText "p"
+ $ constructPara
+ $ liftA para
+ $ withNewStyle
+ $ matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ , read_maybe_nested_img_frame
+ , read_text_seq
+ ] read_plain_text
+
+
+----------------------
+-- Headers
+----------------------
+
+--
+read_header :: BlockMatcher
+read_header = matchingElement NsText "h"
+ $ proc blocks -> do
+ level <- ( readAttrWithDefault NsText "outline-level" 1
+ ) -< blocks
+ children <- ( matchChildContent [ read_span
+ , read_spaces
+ , read_line_break
+ , read_link
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ , read_maybe_nested_img_frame
+ ] read_plain_text
+ ) -< blocks
+ anchor <- getHeaderAnchor -< children
+ let idAttr = (anchor, [], []) -- no classes, no key-value pairs
+ arr (uncurry3 headerWith) -< (idAttr, level, children)
+
+----------------------
+-- Lists
+----------------------
+
+--
+read_list :: BlockMatcher
+read_list = matchingElement NsText "list"
+-- $ withIncreasedListLevel
+ $ constructList
+-- $ liftA bulletList
+ $ matchChildContent' [ read_list_item
+ ]
+--
+read_list_item :: ElementMatcher [Blocks]
+read_list_item = matchingElement NsText "list-item"
+ $ liftA (compactify.(:[]))
+ ( matchChildContent' [ read_paragraph
+ , read_header
+ , read_list
+ ]
+ )
+
+
+----------------------
+-- Links
+----------------------
+
+read_link :: InlineMatcher
+read_link = matchingElement NsText "a"
+ $ liftA3 link
+ ( findAttrWithDefault NsXLink "href" "" )
+ ( findAttrWithDefault NsOffice "title" "" )
+ ( matchChildContent [ read_span
+ , read_note
+ , read_citation
+ , read_bookmark
+ , read_bookmark_start
+ , read_reference_start
+ , read_bookmark_ref
+ , read_reference_ref
+ ] read_plain_text )
+
+
+-------------------------
+-- Footnotes
+-------------------------
+
+read_note :: InlineMatcher
+read_note = matchingElement NsText "note"
+ $ liftA note
+ $ matchChildContent' [ read_note_body ]
+
+read_note_body :: BlockMatcher
+read_note_body = matchingElement NsText "note-body"
+ $ matchChildContent' [ read_paragraph ]
+
+-------------------------
+-- Citations
+-------------------------
+
+read_citation :: InlineMatcher
+read_citation = matchingElement NsText "bibliography-mark"
+ $ liftA2 cite
+ ( liftA2 makeCitation
+ ( findAttrWithDefault NsText "identifier" "" )
+ ( readAttrWithDefault NsText "number" 0 )
+ )
+ ( matchChildContent [] read_plain_text )
+ where
+ makeCitation :: String -> Int -> [Citation]
+ makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
+
+
+----------------------
+-- Tables
+----------------------
+
+--
+read_table :: BlockMatcher
+read_table = matchingElement NsTable "table"
+ $ liftA simpleTable'
+ $ matchChildContent' [ read_table_row
+ ]
+
+-- | A simple table without a caption or headers
+-- | Infers the number of headers from rows
+simpleTable' :: [[Blocks]] -> Blocks
+simpleTable' [] = simpleTable [] []
+simpleTable' (x : rest) = simpleTable (fmap (const defaults) x) (x : rest)
+ where defaults = fromList []
+
+--
+read_table_row :: ElementMatcher [[Blocks]]
+read_table_row = matchingElement NsTable "table-row"
+ $ liftA (:[])
+ $ matchChildContent' [ read_table_cell
+ ]
+
+--
+read_table_cell :: ElementMatcher [Blocks]
+read_table_cell = matchingElement NsTable "table-cell"
+ $ liftA (compactify.(:[]))
+ $ matchChildContent' [ read_paragraph
+ ]
+
+----------------------
+-- Images
+----------------------
+
+--
+read_maybe_nested_img_frame :: InlineMatcher
+read_maybe_nested_img_frame = matchingElement NsDraw "frame"
+ $ proc blocks -> do
+ img <- (findChild' NsDraw "image") -< ()
+ case img of
+ Just _ -> read_frame -< blocks
+ Nothing -> matchChildContent' [ read_frame_text_box ] -< blocks
+
+read_frame :: OdtReaderSafe Inlines Inlines
+read_frame =
+ proc blocks -> do
+ w <- ( findAttr' NsSVG "width" ) -< ()
+ h <- ( findAttr' NsSVG "height" ) -< ()
+ titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks
+ src <- matchChildContent' [ read_image_src ] -< blocks
+ resource <- lookupResource -< src
+ _ <- updateMediaWithResource -< resource
+ alt <- (matchChildContent [] read_plain_text) -< blocks
+ arr (uncurry4 imageWith ) -<
+ (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt)
+
+image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes x y =
+ ( "", [], (dim "width" x) ++ (dim "height" y))
+ where
+ dim _ (Just "") = []
+ dim name (Just v) = [(name, v)]
+ dim _ Nothing = []
+
+read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor)
+read_image_src = matchingElement NsDraw "image"
+ $ proc _ -> do
+ imgSrc <- findAttr NsXLink "href" -< ()
+ case imgSrc of
+ Right src -> returnV src -<< ()
+ Left _ -> returnV "" -< ()
+
+read_frame_title :: InlineMatcher
+read_frame_title = matchingElement NsSVG "title"
+ $ (matchChildContent [] read_plain_text)
+
+read_frame_text_box :: InlineMatcher
+read_frame_text_box = matchingElement NsDraw "text-box"
+ $ proc blocks -> do
+ paragraphs <- (matchChildContent' [ read_paragraph ]) -< blocks
+ arr read_img_with_caption -< toList paragraphs
+
+read_img_with_caption :: [Block] -> Inlines
+read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) =
+ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
+read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) =
+ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
+read_img_with_caption ( (Para (_ : xs)) : ys) =
+ read_img_with_caption ((Para xs) : ys)
+read_img_with_caption _ =
+ mempty
+
+----------------------
+-- Internal links
+----------------------
+
+_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ = "anchor"
+
+--
+readAnchorAttr :: OdtReader _x Anchor
+readAnchorAttr = findAttr NsText "name"
+
+-- | Beware: may fail
+findAnchorName :: OdtReader AnchorPrefix Anchor
+findAnchorName = ( keepingTheValue readAnchorAttr
+ >>^ spreadChoice
+ ) >>?! getPrettyAnchor
+
+
+--
+maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix
+ -> OdtReaderSafe Inlines Inlines
+maybeAddAnchorFrom anchorReader =
+ keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem)
+ >>>
+ proc (inlines, fAnchorElem) -> do
+ case fAnchorElem of
+ Right anchorElem -> returnA -< anchorElem
+ Left _ -> returnA -< inlines
+ where
+ toAnchorElem :: Anchor -> Inlines
+ toAnchorElem anchorID = spanWith (anchorID, [], []) mempty
+ -- no classes, no key-value pairs
+
+--
+read_bookmark :: InlineMatcher
+read_bookmark = matchingElement NsText "bookmark"
+ $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
+
+--
+read_bookmark_start :: InlineMatcher
+read_bookmark_start = matchingElement NsText "bookmark-start"
+ $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_)
+
+--
+read_reference_start :: InlineMatcher
+read_reference_start = matchingElement NsText "reference-mark-start"
+ $ maybeAddAnchorFrom readAnchorAttr
+
+-- | Beware: may fail
+findAnchorRef :: OdtReader _x Anchor
+findAnchorRef = ( findAttr NsText "ref-name"
+ >>?^ (_ANCHOR_PREFIX_,)
+ ) >>?! getPrettyAnchor
+
+
+--
+maybeInAnchorRef :: OdtReaderSafe Inlines Inlines
+maybeInAnchorRef = proc inlines -> do
+ fRef <- findAnchorRef -< ()
+ case fRef of
+ Right anchor ->
+ arr (toAnchorRef anchor) -<< inlines
+ Left _ -> returnA -< inlines
+ where
+ toAnchorRef :: Anchor -> Inlines -> Inlines
+ toAnchorRef anchor = link ('#':anchor) "" -- no title
+
+--
+read_bookmark_ref :: InlineMatcher
+read_bookmark_ref = matchingElement NsText "bookmark-ref"
+ $ maybeInAnchorRef
+ <<< matchChildContent [] read_plain_text
+
+--
+read_reference_ref :: InlineMatcher
+read_reference_ref = matchingElement NsText "reference-ref"
+ $ maybeInAnchorRef
+ <<< matchChildContent [] read_plain_text
+
+
+----------------------
+-- Entry point
+----------------------
+
+--read_plain_content :: OdtReaderSafe _x Inlines
+--read_plain_content = strContent >>^ text
+
+read_text :: OdtReaderSafe _x Pandoc
+read_text = matchChildContent' [ read_header
+ , read_paragraph
+ , read_list
+ , read_table
+ ]
+ >>^ doc
+
+post_process :: Pandoc -> Pandoc
+post_process (Pandoc m blocks) =
+ Pandoc m (post_process' blocks)
+
+post_process' :: [Block] -> [Block]
+post_process' ((Table _ a w h r) : (Div ("", ["caption"], _) [Para inlines] ) : xs) =
+ (Table inlines a w h r) : ( post_process' xs )
+post_process' bs = bs
+
+read_body :: OdtReader _x (Pandoc, MediaBag)
+read_body = executeIn NsOffice "body"
+ $ executeIn NsOffice "text"
+ $ liftAsSuccess
+ $ proc inlines -> do
+ txt <- read_text -< inlines
+ state <- getExtraState -< ()
+ returnA -< (post_process txt, getMediaBag state)
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
new file mode 100644
index 000000000..877443543
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -0,0 +1,260 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.Fallible
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Data types and utilities representing failure. Most of it is based on the
+"Either" type in its usual configuration (left represents failure).
+
+In most cases, the failure type is implied or required to be a "Monoid".
+
+The choice of "Either" instead of a custom type makes it easier to write
+compatible instances of "ArrowChoice".
+-}
+
+-- We export everything
+module Text.Pandoc.Readers.Odt.Generic.Fallible where
+
+import Control.Applicative
+import Control.Monad
+
+import qualified Data.Foldable as F
+import Data.Monoid ((<>))
+
+-- | Default for now. Will probably become a class at some point.
+type Failure = ()
+
+type Fallible a = Either Failure a
+
+
+-- | False -> Left (), True -> Right ()
+boolToEither :: Bool -> Fallible ()
+boolToEither False = Left ()
+boolToEither True = Right ()
+
+-- | False -> Left (), True -> Right ()
+boolToChoice :: Bool -> Fallible ()
+boolToChoice False = Left ()
+boolToChoice True = Right ()
+
+--
+maybeToEither :: Maybe a -> Fallible a
+maybeToEither (Just a) = Right a
+maybeToEither Nothing = Left ()
+
+--
+eitherToMaybe :: Either _l a -> Maybe a
+eitherToMaybe (Left _) = Nothing
+eitherToMaybe (Right a) = Just a
+
+-- | > untagEither === either id id
+untagEither :: Either a a -> a
+untagEither (Left a) = a
+untagEither (Right a) = a
+
+-- | > fromLeft f === either f id
+fromLeft :: (a -> b) -> Either a b -> b
+fromLeft f (Left a) = f a
+fromLeft _ (Right b) = b
+
+-- | > fromRight f === either id f
+fromRight :: (a -> b) -> Either b a -> b
+fromRight _ (Left b) = b
+fromRight f (Right a) = f a
+
+-- | > recover a === fromLeft (const a) === either (const a) id
+recover :: a -> Either _f a -> a
+recover a (Left _) = a
+recover _ (Right a) = a
+
+-- | I would love to use 'fail'. Alas, 'Monad.fail'...
+failWith :: failure -> Either failure _x
+failWith f = Left f
+
+--
+failEmpty :: (Monoid failure) => Either failure _x
+failEmpty = failWith mempty
+
+--
+succeedWith :: a -> Either _x a
+succeedWith = Right
+
+--
+collapseEither :: Either failure (Either failure x)
+ -> Either failure x
+collapseEither (Left f ) = Left f
+collapseEither (Right (Left f)) = Left f
+collapseEither (Right (Right x)) = Right x
+
+-- | If either of the values represents an error, the result is a
+-- (possibly combined) error. If both values represent a success,
+-- both are returned.
+chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b')
+chooseMin = chooseMinWith (,)
+
+-- | If either of the values represents an error, the result is a
+-- (possibly combined) error. If both values represent a success,
+-- a combination is returned.
+chooseMinWith :: (Monoid a) => (b -> b' -> c)
+ -> Either a b
+ -> Either a b'
+ -> Either a c
+chooseMinWith (><) (Right a) (Right b) = Right $ a >< b
+chooseMinWith _ (Left a) (Left b) = Left $ a <> b
+chooseMinWith _ (Left a) _ = Left a
+chooseMinWith _ _ (Left b) = Left b
+
+-- | If either of the values represents a non-error, the result is a
+-- (possibly combined) non-error. If both values represent an error, an error
+-- is returned.
+chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b
+chooseMax = chooseMaxWith (<>)
+
+-- | If either of the values represents a non-error, the result is a
+-- (possibly combined) non-error. If both values represent an error, an error
+-- is returned.
+chooseMaxWith :: (Monoid a) => (b -> b -> b)
+ -> Either a b
+ -> Either a b
+ -> Either a b
+chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b
+chooseMaxWith _ (Left a) (Left b) = Left $ a <> b
+chooseMaxWith _ (Right a) _ = Right a
+chooseMaxWith _ _ (Right b) = Right b
+
+
+-- | Class of containers that can escalate contained 'Either's.
+-- The word "Vector" is meant in the sense of a disease transmitter.
+class ChoiceVector v where
+ spreadChoice :: v (Either f a) -> Either f (v a)
+
+-- Let's do a few examples first
+
+instance ChoiceVector Maybe where
+ spreadChoice (Just (Left f)) = Left f
+ spreadChoice (Just (Right x)) = Right (Just x)
+ spreadChoice Nothing = Right Nothing
+
+instance ChoiceVector (Either l) where
+ spreadChoice (Right (Left f)) = Left f
+ spreadChoice (Right (Right x)) = Right (Right x)
+ spreadChoice (Left x ) = Right (Left x)
+
+instance ChoiceVector ((,) a) where
+ spreadChoice (_, Left f) = Left f
+ spreadChoice (x, Right y) = Right (x,y)
+ -- Wasn't there a newtype somewhere with the elements flipped?
+
+--
+-- More instances later, first some discussion.
+--
+-- I'll have to freshen up on type system details to see how (or if) to do
+-- something like
+--
+-- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where
+-- > :
+--
+-- But maybe it would be even better to use something like
+--
+-- > class ChoiceVector v v' f | v -> v' f where
+-- > spreadChoice :: v -> Either f v'
+--
+-- That way, more places in @v@ could spread the cheer, e.g.:
+--
+-- As before:
+-- -- ( a , Either f b) (a , b) f
+-- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where
+-- > spreadChoice (_, Left f) = Left f
+-- > spreadChoice (a, Right b) = Right (a,b)
+--
+-- But also:
+-- -- ( Either f a , b) (a , b) f
+-- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where
+-- > spreadChoice (Right a,b) = Right (a,b)
+-- > spreadChoice (Left f,_) = Left f
+--
+-- And maybe even:
+-- -- ( Either f a , Either f b) (a , b) f
+-- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where
+-- > spreadChoice (Right a , Right b) = Right (a,b)
+-- > spreadChoice (Left f , _ ) = Left f
+-- > spreadChoice ( _ , Left f) = Left f
+--
+-- Of course that would lead to a lot of overlapping instances...
+-- But I can't think of a different way. A selector function might help,
+-- but not even a "Data.Traversable" is powerful enough for that.
+-- But maybe someone has already solved all this with a lens library.
+--
+-- Well, it's an interesting academic question. But for practical purposes,
+-- I have more than enough right now.
+
+instance ChoiceVector ((,,) a b) where
+ spreadChoice (_,_, Left f) = Left f
+ spreadChoice (a,b, Right x) = Right (a,b,x)
+
+instance ChoiceVector ((,,,) a b c) where
+ spreadChoice (_,_,_, Left f) = Left f
+ spreadChoice (a,b,c, Right x) = Right (a,b,c,x)
+
+instance ChoiceVector ((,,,,) a b c d) where
+ spreadChoice (_,_,_,_, Left f) = Left f
+ spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x)
+
+instance ChoiceVector (Const a) where
+ spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types
+
+-- | Fails on the first error
+instance ChoiceVector [] where
+ spreadChoice = sequence -- using the monad instance of Either.
+ -- Could be generalized to "Data.Traversable" - but why play
+ -- with UndecidableInstances unless this is really needed.
+
+-- | Wrapper for a list. While the normal list instance of 'ChoiceVector'
+-- fails whenever it can, this type will never fail.
+newtype SuccessList a = SuccessList { collectNonFailing :: [a] }
+ deriving ( Eq, Ord, Show )
+
+instance ChoiceVector SuccessList where
+ spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
+ where unTagRight (Right x) = (x:)
+ unTagRight _ = id
+
+-- | Like 'catMaybes', but for 'Either'.
+collectRights :: [Either _l r] -> [r]
+collectRights = collectNonFailing . untag . spreadChoice . SuccessList
+ where untag = fromLeft (error "Unexpected Left")
+
+-- | A version of 'collectRights' generalized to other containers. The
+-- container must be both "reducible" and "buildable". Most general containers
+-- should fullfill these requirements, but there is no single typeclass
+-- (that I know of) for that.
+-- Therefore, they are split between 'Foldable' and 'MonadPlus'.
+-- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.)
+collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r
+collectRightsF = F.foldr unTagRight mzero
+ where unTagRight (Right x) = mplus $ return x
+ unTagRight _ = id
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
new file mode 100644
index 000000000..82ae3e20e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -0,0 +1,62 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.Namespaces
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A class containing a set of namespace identifiers. Used to convert between
+typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.Namespaces where
+
+import qualified Data.Map as M
+
+--
+type NameSpaceIRI = String
+
+--
+type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
+
+--
+class (Eq nsID, Ord nsID) => NameSpaceID nsID where
+
+ -- | Given a IRI, possibly update the map and return the id of the namespace.
+ -- May fail if the namespace is unknown and the application does not
+ -- allow unknown namespaces.
+ getNamespaceID :: NameSpaceIRI
+ -> NameSpaceIRIs nsID
+ -> Maybe (NameSpaceIRIs nsID, nsID)
+ -- | Given a namespace id, lookup its IRI. May be overriden for performance.
+ getIRI :: nsID
+ -> NameSpaceIRIs nsID
+ -> Maybe NameSpaceIRI
+ -- | The root element of an XML document has a namespace, too, and the
+ -- "XML.Light-parser" is eager to remove the corresponding namespace
+ -- attribute.
+ -- As a result, at least this root namespace must be provided.
+ getInitialIRImap :: NameSpaceIRIs nsID
+
+ getIRI = M.lookup
+ getInitialIRImap = M.empty
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
new file mode 100644
index 000000000..afd7d616c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs
@@ -0,0 +1,48 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.SetMap
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A map of values to sets of values.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.SetMap where
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+type SetMap k v = M.Map k (S.Set v)
+
+empty :: SetMap k v
+empty = M.empty
+
+fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v
+fromList = foldr (uncurry insert) empty
+
+insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v
+insert key value setMap = M.insertWith S.union key (S.singleton value) setMap
+
+union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v
+union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
new file mode 100644
index 000000000..6c10ed61d
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -0,0 +1,171 @@
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt.Generic.Utils
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+General utility functions for the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.Utils
+( uncurry3
+, uncurry4
+, uncurry5
+, uncurry6
+, uncurry7
+, uncurry8
+, swap
+, reverseComposition
+, bool
+, tryToRead
+, Lookupable(..)
+, readLookupables
+, readLookupable
+, readPercent
+, findBy
+, swing
+, composition
+) where
+
+import Control.Category ( Category, (>>>), (<<<) )
+import qualified Control.Category as Cat ( id )
+import Control.Monad ( msum )
+
+import qualified Data.Foldable as F ( Foldable, foldr )
+import Data.Maybe
+
+
+-- | Aequivalent to
+-- > foldr (.) id
+-- where '(.)' are 'id' are the ones from "Control.Category"
+-- and 'foldr' is the one from "Data.Foldable".
+-- The noun-form was chosen to be consistend with 'sum', 'product' etc
+-- based on the discussion at
+-- <https://groups.google.com/forum/#!topic/haskell-cafe/VkOZM1zaHOI>
+-- (that I was not part of)
+composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
+composition = F.foldr (<<<) Cat.id
+
+-- | Aequivalent to
+-- > foldr (flip (.)) id
+-- where '(.)' are 'id' are the ones from "Control.Category"
+-- and 'foldr' is the one from "Data.Foldable".
+-- A reversed version of 'composition'.
+reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
+reverseComposition = F.foldr (>>>) Cat.id
+
+-- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'.
+-- Note that the first value is selected if the boolean value is 'False'.
+-- That makes 'bool' consistent with the other two. Also, 'bool' now takes its
+-- arguments in the exact opposite order compared to the normal if construct.
+bool :: a -> a -> Bool -> a
+bool x _ False = x
+bool _ x True = x
+
+-- | This function often makes it possible to switch values with the functions
+-- that are applied to them.
+--
+-- Examples:
+-- > swing map :: [a -> b] -> a -> [b]
+-- > swing any :: [a -> Bool] -> a -> Bool
+-- > swing foldr :: b -> a -> [a -> b -> b] -> b
+-- > swing scanr :: c -> a -> [a -> c -> c] -> c
+-- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c]
+-- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool)
+--
+-- Stolen from <https://wiki.haskell.org/Pointfree>
+swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d
+swing = flip.(.flip id)
+-- swing f c a = f ($ a) c
+
+
+-- | Alternative to 'read'/'reads'. The former of these throws errors
+-- (nobody wants that) while the latter returns "to much" for simple purposes.
+-- This function instead applies 'reads' and returns the first match (if any)
+-- in a 'Maybe'.
+tryToRead :: (Read r) => String -> Maybe r
+tryToRead = reads >>> listToMaybe >>> fmap fst
+
+-- | A version of 'reads' that requires a '%' sign after the number
+readPercent :: ReadS Int
+readPercent s = [ (i,s') | (i , r ) <- reads s
+ , ("%" , s') <- lex r
+ ]
+
+-- | Data that can be looked up.
+-- This is mostly a utility to read data with kind *.
+class Lookupable a where
+ lookupTable :: [(String, a)]
+
+-- | The idea is to use this function as if there was a declaration like
+--
+-- > instance (Lookupable a) => (Read a) where
+-- > readsPrec _ = readLookupables
+-- .
+-- But including this code in this form would need UndecideableInstances.
+-- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
+-- can be used directly in almost any case.
+readLookupables :: (Lookupable a) => String -> [(a,String)]
+readLookupables s = [ (a,rest) | (word,rest) <- lex s,
+ let result = lookup word lookupTable,
+ isJust result,
+ let Just a = result
+ ]
+
+-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
+readLookupable :: (Lookupable a) => String -> Maybe a
+readLookupable s = msum
+ $ map ((`lookup` lookupTable).fst)
+ $ lex s
+
+uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
+uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
+uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z
+uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z
+uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z
+uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z
+
+uncurry3 fun (a,b,c ) = fun a b c
+uncurry4 fun (a,b,c,d ) = fun a b c d
+uncurry5 fun (a,b,c,d,e ) = fun a b c d e
+uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f
+uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g
+uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h
+
+swap :: (a,b) -> (b,a)
+swap (a,b) = (b,a)
+
+-- | A version of "Data.List.find" that uses a converter to a Maybe instance.
+-- The returned value is the first which the converter returns in a 'Just'
+-- wrapper.
+findBy :: (a -> Maybe b) -> [a] -> Maybe b
+findBy _ [] = Nothing
+findBy f ((f -> Just x):_ ) = Just x
+findBy f ( _:xs) = findBy f xs
+
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
new file mode 100644
index 000000000..8c03d1a09
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -0,0 +1,1063 @@
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RecordWildCards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+A generalized XML parser based on stateful arrows.
+It might be sufficient to define this reader as a comonad, but there is
+not a lot of use in trying.
+-}
+
+module Text.Pandoc.Readers.Odt.Generic.XMLConverter
+( ElementName
+, XMLConverterState
+, XMLConverter
+, FallibleXMLConverter
+, swapPosition
+, runConverter
+, runConverter''
+, runConverter'
+, runConverterF'
+, runConverterF
+, getCurrentElement
+, getExtraState
+, setExtraState
+, modifyExtraState
+, convertingExtraState
+, producingExtraState
+, lookupNSiri
+, lookupNSprefix
+, readNSattributes
+, elemName
+, elemNameIs
+, strContent
+, elContent
+, currentElem
+, currentElemIs
+, expectElement
+, elChildren
+, findChildren
+, filterChildren
+, filterChildrenName
+, findChild'
+, findChild
+, filterChild'
+, filterChild
+, filterChildName'
+, filterChildName
+, isSet
+, isSet'
+, isSetWithDefault
+, hasAttrValueOf'
+, failIfNotAttrValueOf
+, isThatTheAttrValue
+, searchAttrIn
+, searchAttrWith
+, searchAttr
+, lookupAttr
+, lookupAttr'
+, lookupAttrWithDefault
+, lookupDefaultingAttr
+, findAttr'
+, findAttr
+, findAttrWithDefault
+, readAttr
+, readAttr'
+, readAttrWithDefault
+, getAttr
+-- , (>/<)
+-- , (?>/<)
+, executeIn
+, collectEvery
+, withEveryL
+, withEvery
+, tryAll
+, tryAll'
+, IdXMLConverter
+, MaybeEConverter
+, ElementMatchConverter
+, MaybeCConverter
+, ContentMatchConverter
+, makeMatcherE
+, makeMatcherC
+, prepareMatchersE
+, prepareMatchersC
+, matchChildren
+, matchContent''
+, matchContent'
+, matchContent
+) where
+
+import Control.Applicative hiding ( liftA, liftA2 )
+import Control.Monad ( MonadPlus )
+import Control.Arrow
+
+import qualified Data.Map as M
+import qualified Data.Foldable as F
+import Data.Default
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Readers.Odt.Arrows.State
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+
+import Text.Pandoc.Readers.Odt.Generic.Namespaces
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+
+--------------------------------------------------------------------------------
+-- Basis types for readability
+--------------------------------------------------------------------------------
+
+--
+type ElementName = String
+type AttributeName = String
+type AttributeValue = String
+
+--
+type NameSpacePrefix = String
+
+--
+type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
+
+--------------------------------------------------------------------------------
+-- Main converter state
+--------------------------------------------------------------------------------
+
+-- GADT so some of the NameSpaceID restrictions can be deduced
+data XMLConverterState nsID extraState where
+ XMLConverterState :: NameSpaceID nsID
+ => { -- | A stack of parent elements. The top element is the current one.
+ -- Arguably, a real Zipper would be better. But that is an
+ -- optimization that can be made at a later time, e.g. when
+ -- replacing Text.XML.Light.
+ parentElements :: [XML.Element]
+ -- | A map from internal namespace IDs to the namespace prefixes
+ -- used in XML elements
+ , namespacePrefixes :: NameSpacePrefixes nsID
+ -- | A map from internal namespace IDs to namespace IRIs
+ -- (Only necessary for matching namespace IDs and prefixes)
+ , namespaceIRIs :: NameSpaceIRIs nsID
+ -- | A place to put "something else". This feature is used heavily
+ -- to keep the main code cleaner. More specifically, the main reader
+ -- is divided into different stages. Each stage lifts something up
+ -- here, which the next stage can then use. This could of course be
+ -- generalized to a state-tree or used for the namespace IRIs. The
+ -- border between states and values is an imaginary one, after all.
+ -- But the separation as it is seems to be enough for now.
+ , moreState :: extraState
+ }
+ -> XMLConverterState nsID extraState
+
+--
+createStartState :: (NameSpaceID nsID)
+ => XML.Element
+ -> extraState
+ -> XMLConverterState nsID extraState
+createStartState element extraState =
+ XMLConverterState
+ { parentElements = [element]
+ , namespacePrefixes = M.empty
+ , namespaceIRIs = getInitialIRImap
+ , moreState = extraState
+ }
+
+-- | Functor over extra state
+instance Functor (XMLConverterState nsID) where
+ fmap f ( XMLConverterState parents prefixes iRIs extraState )
+ = XMLConverterState parents prefixes iRIs (f extraState)
+
+--
+replaceExtraState :: extraState
+ -> XMLConverterState nsID _x
+ -> XMLConverterState nsID extraState
+replaceExtraState x s
+ = fmap (const x) s
+
+--
+currentElement :: XMLConverterState nsID extraState
+ -> XML.Element
+currentElement state = head (parentElements state)
+
+-- | Replace the current position by another, modifying the extra state
+-- in the process
+swapPosition :: (extraState -> extraState')
+ -> [XML.Element]
+ -> XMLConverterState nsID extraState
+ -> XMLConverterState nsID extraState'
+swapPosition f stack state
+ = state { parentElements = stack
+ , moreState = f (moreState state)
+ }
+
+-- | Replace the current position by another, modifying the extra state
+-- in the process
+swapStack' :: XMLConverterState nsID extraState
+ -> [XML.Element]
+ -> ( XMLConverterState nsID extraState , [XML.Element] )
+swapStack' state stack
+ = ( state { parentElements = stack }
+ , parentElements state
+ )
+
+--
+pushElement :: XML.Element
+ -> XMLConverterState nsID extraState
+ -> XMLConverterState nsID extraState
+pushElement e state = state { parentElements = e:(parentElements state) }
+
+-- | Pop the top element from the call stack, unless it is the last one.
+popElement :: XMLConverterState nsID extraState
+ -> Maybe (XMLConverterState nsID extraState)
+popElement state
+ | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es }
+ | otherwise = Nothing
+
+--------------------------------------------------------------------------------
+-- Main type
+--------------------------------------------------------------------------------
+
+-- It might be a good idea to pack the converters in a GADT
+-- Downside: data instead of type
+-- Upside: 'Failure' could be made a parameter as well.
+
+--
+type XMLConverter nsID extraState input output
+ = ArrowState (XMLConverterState nsID extraState ) input output
+
+type FallibleXMLConverter nsID extraState input output
+ = XMLConverter nsID extraState input (Fallible output)
+
+--
+runConverter :: XMLConverter nsID extraState input output
+ -> XMLConverterState nsID extraState
+ -> input
+ -> output
+runConverter converter state input = snd $ runArrowState converter (state,input)
+
+--
+runConverter'' :: (NameSpaceID nsID)
+ => XMLConverter nsID extraState (Fallible ()) output
+ -> extraState
+ -> XML.Element
+ -> output
+runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) ()
+
+runConverter' :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState () success
+ -> extraState
+ -> XML.Element
+ -> Fallible success
+runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) ()
+
+--
+runConverterF' :: FallibleXMLConverter nsID extraState x y
+ -> XMLConverterState nsID extraState
+ -> Fallible x -> Fallible y
+runConverterF' a s e = runConverter (returnV e >>? a) s e
+
+--
+runConverterF :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState XML.Element x
+ -> extraState
+ -> Fallible XML.Element -> Fallible x
+runConverterF a s = either failWith
+ (\e -> runConverter a (createStartState e s) e)
+
+--
+getCurrentElement :: XMLConverter nsID extraState x XML.Element
+getCurrentElement = extractFromState currentElement
+
+--
+getExtraState :: XMLConverter nsID extraState x extraState
+getExtraState = extractFromState moreState
+
+--
+setExtraState :: XMLConverter nsID extraState extraState extraState
+setExtraState = withState $ \state extra
+ -> (replaceExtraState extra state , extra)
+
+
+-- | Lifts a function to the extra state.
+modifyExtraState :: (extraState -> extraState)
+ -> XMLConverter nsID extraState x x
+modifyExtraState = modifyState.fmap
+
+
+-- | First sets the extra state to the new value. Then modifies the original
+-- extra state with a converter that uses the new state. Finally, the
+-- intermediate state is dropped and the extra state is lifted into the
+-- state as it was at the beginning of the function.
+-- As a result, exactly the extra state and nothing else is changed.
+-- The resulting converter even behaves like an identity converter on the
+-- value level.
+--
+-- (The -ing form is meant to be mnemonic in a sequence of arrows as in
+-- convertingExtraState () converter >>> doOtherStuff)
+--
+convertingExtraState :: extraState'
+ -> FallibleXMLConverter nsID extraState' extraState extraState
+ -> FallibleXMLConverter nsID extraState x x
+convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
+ where
+ setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v
+ modifyWithA = keepingTheValue (moreState ^>> a)
+ >>^ spreadChoice >>?% flip replaceExtraState
+
+-- | First sets the extra state to the new value. Then produces a new
+-- extra state with a converter that uses the new state. Finally, the
+-- intermediate state is dropped and the extra state is lifted into the
+-- state as it was at the beginning of the function.
+-- As a result, exactly the extra state and nothing else is changed.
+-- The resulting converter even behaves like an identity converter on the
+-- value level.
+--
+-- Aequivalent to
+--
+-- > \v x a -> convertingExtraState v (returnV x >>> a)
+--
+-- (The -ing form is meant to be mnemonic in a sequence of arrows as in
+-- producingExtraState () () producer >>> doOtherStuff)
+--
+producingExtraState :: extraState'
+ -> a
+ -> FallibleXMLConverter nsID extraState' a extraState
+ -> FallibleXMLConverter nsID extraState x x
+producingExtraState v x a = convertingExtraState v (returnV x >>> a)
+
+
+--------------------------------------------------------------------------------
+-- Work in namespaces
+--------------------------------------------------------------------------------
+
+-- | Arrow version of 'getIRI'
+lookupNSiri :: (NameSpaceID nsID)
+ => nsID
+ -> XMLConverter nsID extraState x (Maybe NameSpaceIRI)
+lookupNSiri nsID = extractFromState
+ $ \state -> getIRI nsID $ namespaceIRIs state
+
+--
+lookupNSprefix :: (NameSpaceID nsID)
+ => nsID
+ -> XMLConverter nsID extraState x (Maybe NameSpacePrefix)
+lookupNSprefix nsID = extractFromState
+ $ \state -> M.lookup nsID $ namespacePrefixes state
+
+-- | Extracts namespace attributes from the current element and tries to
+-- update the current mapping accordingly
+readNSattributes :: (NameSpaceID nsID)
+ => FallibleXMLConverter nsID extraState x ()
+readNSattributes = fromState $ \state -> maybe (state, failEmpty )
+ ( , succeedWith ())
+ (extractNSAttrs state )
+ where
+ extractNSAttrs :: (NameSpaceID nsID)
+ => XMLConverterState nsID extraState
+ -> Maybe (XMLConverterState nsID extraState)
+ extractNSAttrs startState
+ = foldl (\state d -> state >>= addNS d)
+ (Just startState)
+ nsAttribs
+ where nsAttribs = mapMaybe readNSattr (XML.elAttribs element)
+ element = currentElement startState
+ readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri)
+ = Just (name, iri)
+ readNSattr _ = Nothing
+ addNS (prefix, iri) state = fmap updateState
+ $ getNamespaceID iri
+ $ namespaceIRIs state
+ where updateState (iris,nsID)
+ = state { namespaceIRIs = iris
+ , namespacePrefixes = M.insert nsID prefix
+ $ namespacePrefixes state
+ }
+
+--------------------------------------------------------------------------------
+-- Common namespace accessors
+--------------------------------------------------------------------------------
+
+-- | Given a namespace id and an element name, creates a 'XML.QName' for
+-- internal use
+elemName :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x XML.QName
+elemName nsID name = lookupNSiri nsID
+ &&& lookupNSprefix nsID
+ >>% XML.QName name
+
+-- | Checks if a given element matches both a specified namespace id
+-- and a specified element name
+elemNameIs :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState XML.Element Bool
+elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName
+ where hasThatName e iri = let elName = XML.elName e
+ in XML.qName elName == name
+ && XML.qURI elName == iri
+
+--------------------------------------------------------------------------------
+-- General content
+--------------------------------------------------------------------------------
+
+--
+strContent :: XMLConverter nsID extraState x String
+strContent = getCurrentElement
+ >>^ XML.strContent
+
+--
+elContent :: XMLConverter nsID extraState x [XML.Content]
+elContent = getCurrentElement
+ >>^ XML.elContent
+
+--------------------------------------------------------------------------------
+-- Current element
+--------------------------------------------------------------------------------
+
+--
+currentElem :: XMLConverter nsID extraState x (XML.QName)
+currentElem = getCurrentElement
+ >>^ XML.elName
+
+currentElemIs :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x Bool
+currentElemIs nsID name = getCurrentElement
+ >>> elemNameIs nsID name
+
+
+
+{-
+currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>>
+ (XML.qName >>^ (&&).(== name) )
+ ^&&&^
+ (XML.qIRI >>^ (==) )
+ ) >>% (.)
+ ) &&& lookupNSiri nsID >>% ($)
+-}
+
+--
+expectElement :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState x ()
+expectElement nsID name = currentElemIs nsID name
+ >>^ boolToChoice
+
+--------------------------------------------------------------------------------
+-- Chilren
+--------------------------------------------------------------------------------
+
+--
+elChildren :: XMLConverter nsID extraState x [XML.Element]
+elChildren = getCurrentElement
+ >>^ XML.elChildren
+
+--
+findChildren :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState x [XML.Element]
+findChildren nsID name = elemName nsID name
+ &&& getCurrentElement
+ >>% XML.findChildren
+
+--
+filterChildren :: (XML.Element -> Bool)
+ -> XMLConverter nsID extraState x [XML.Element]
+filterChildren p = getCurrentElement
+ >>^ XML.filterChildren p
+
+--
+filterChildrenName :: (XML.QName -> Bool)
+ -> XMLConverter nsID extraState x [XML.Element]
+filterChildrenName p = getCurrentElement
+ >>^ XML.filterChildrenName p
+
+--
+findChild' :: (NameSpaceID nsID)
+ => nsID
+ -> ElementName
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+findChild' nsID name = elemName nsID name
+ &&& getCurrentElement
+ >>% XML.findChild
+
+--
+findChild :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState x XML.Element
+findChild nsID name = findChild' nsID name
+ >>> maybeToChoice
+
+--
+filterChild' :: (XML.Element -> Bool)
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+filterChild' p = getCurrentElement
+ >>^ XML.filterChild p
+
+--
+filterChild :: (XML.Element -> Bool)
+ -> FallibleXMLConverter nsID extraState x XML.Element
+filterChild p = filterChild' p
+ >>> maybeToChoice
+
+--
+filterChildName' :: (XML.QName -> Bool)
+ -> XMLConverter nsID extraState x (Maybe XML.Element)
+filterChildName' p = getCurrentElement
+ >>^ XML.filterChildName p
+
+--
+filterChildName :: (XML.QName -> Bool)
+ -> FallibleXMLConverter nsID extraState x XML.Element
+filterChildName p = filterChildName' p
+ >>> maybeToChoice
+
+
+--------------------------------------------------------------------------------
+-- Attributes
+--------------------------------------------------------------------------------
+
+--
+isSet :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> (Either Failure Bool)
+ -> FallibleXMLConverter nsID extraState x Bool
+isSet nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ maybe deflt stringToBool
+
+--
+isSet' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe Bool)
+isSet' nsID attrName = findAttr' nsID attrName
+ >>^ (>>= stringToBool')
+
+isSetWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> Bool
+ -> XMLConverter nsID extraState x Bool
+isSetWithDefault nsID attrName def'
+ = isSet' nsID attrName
+ >>^ fromMaybe def'
+
+--
+hasAttrValueOf' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> XMLConverter nsID extraState x Bool
+hasAttrValueOf' nsID attrName attrValue
+ = findAttr nsID attrName
+ >>> ( const False ^|||^ (==attrValue))
+
+--
+failIfNotAttrValueOf :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> FallibleXMLConverter nsID extraState x ()
+failIfNotAttrValueOf nsID attrName attrValue
+ = hasAttrValueOf' nsID attrName attrValue
+ >>^ boolToChoice
+
+-- | Is the value that is currently transported in the arrow the value of
+-- the specified attribute?
+isThatTheAttrValue :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState AttributeValue Bool
+isThatTheAttrValue nsID attrName
+ = keepingTheValue
+ (findAttr nsID attrName)
+ >>% right.(==)
+
+-- | Lookup value in a dictionary, fail if no attribute found or value
+-- not in dictionary
+searchAttrIn :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> [(AttributeValue,a)]
+ -> FallibleXMLConverter nsID extraState x a
+searchAttrIn nsID attrName dict
+ = findAttr nsID attrName
+ >>?^? maybeToChoice.(`lookup` dict )
+
+
+-- | Lookup value in a dictionary. Fail if no attribute found. If value not in
+-- dictionary, return default value
+searchAttrWith :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> a
+ -> [(AttributeValue,a)]
+ -> FallibleXMLConverter nsID extraState x a
+searchAttrWith nsID attrName defV dict
+ = findAttr nsID attrName
+ >>?^ (fromMaybe defV).(`lookup` dict )
+
+-- | Lookup value in a dictionary. If attribute or value not found,
+-- return default value
+searchAttr :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> a
+ -> [(AttributeValue,a)]
+ -> XMLConverter nsID extraState x a
+searchAttr nsID attrName defV dict
+ = searchAttrIn nsID attrName dict
+ >>> const defV ^|||^ id
+
+-- | Read a 'Lookupable' attribute. Fail if no match.
+lookupAttr :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x a
+lookupAttr nsID attrName = lookupAttr' nsID attrName
+ >>^ maybeToChoice
+
+
+-- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'.
+lookupAttr' :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe a)
+lookupAttr' nsID attrName
+ = findAttr' nsID attrName
+ >>^ (>>= readLookupable)
+
+-- | Read a 'Lookupable' attribute with explicit default
+lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a)
+ => nsID -> AttributeName
+ -> a
+ -> XMLConverter nsID extraState x a
+lookupAttrWithDefault nsID attrName deflt
+ = lookupAttr' nsID attrName
+ >>^ fromMaybe deflt
+
+-- | Read a 'Lookupable' attribute with implicit default
+lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x a
+lookupDefaultingAttr nsID attrName
+ = lookupAttrWithDefault nsID attrName def
+
+-- | Return value as a (Maybe String)
+findAttr' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe AttributeValue)
+findAttr' nsID attrName = elemName nsID attrName
+ &&& getCurrentElement
+ >>% XML.findAttr
+
+-- | Return value as string or fail
+findAttr :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x AttributeValue
+findAttr nsID attrName = findAttr' nsID attrName
+ >>> maybeToChoice
+
+-- | Return value as string or return provided default value
+findAttrWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> AttributeValue
+ -> XMLConverter nsID extraState x AttributeValue
+findAttrWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ fromMaybe deflt
+
+-- | Read and return value or fail
+readAttr :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x attrValue
+readAttr nsID attrName = readAttr' nsID attrName
+ >>> maybeToChoice
+
+-- | Read and return value or return Nothing
+readAttr' :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe attrValue)
+readAttr' nsID attrName = findAttr' nsID attrName
+ >>^ (>>= tryToRead)
+
+-- | Read and return value or return provided default value
+readAttrWithDefault :: (NameSpaceID nsID, Read attrValue)
+ => nsID -> AttributeName
+ -> attrValue
+ -> XMLConverter nsID extraState x attrValue
+readAttrWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ (>>= tryToRead)
+ >>^ fromMaybe deflt
+
+-- | Read and return value or return default value from 'Default' instance
+getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x attrValue
+getAttr nsID attrName = readAttrWithDefault nsID attrName def
+
+--------------------------------------------------------------------------------
+-- Movements
+--------------------------------------------------------------------------------
+
+--
+jumpThere :: XMLConverter nsID extraState XML.Element XML.Element
+jumpThere = withState (\state element
+ -> ( pushElement element state , element )
+ )
+
+--
+swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element]
+swapStack = withState swapStack'
+
+--
+jumpBack :: FallibleXMLConverter nsID extraState _x _x
+jumpBack = tryModifyState (popElement >>> maybeToChoice)
+
+-- | Support function for "procedural" converters: jump to an element, execute
+-- a converter, jump back.
+-- This version is safer than 'executeThere', because it does not rely on the
+-- internal stack. As a result, the converter can not move around in arbitrary
+-- ways. The downside is of course that some of the environment is not
+-- accessible to the converter.
+switchingTheStack :: XMLConverter nsID moreState a b
+ -> XMLConverter nsID moreState (a, XML.Element) b
+switchingTheStack a = second ( (:[]) ^>> swapStack )
+ >>> first a
+ >>> second swapStack
+ >>^ fst
+
+-- | Support function for "procedural" converters: jumps to an element, executes
+-- a converter, jumps back.
+-- Make sure that the converter is well-behaved; that is it should
+-- return to the exact position it started from in /every possible path/ of
+-- execution, even if it "fails". If it does not, you may encounter
+-- strange bugs. If you are not sure about the behaviour or want to use
+-- shortcuts, you can often use 'switchingTheStack' instead.
+executeThere :: FallibleXMLConverter nsID moreState a b
+ -> FallibleXMLConverter nsID moreState (a, XML.Element) b
+executeThere a = second jumpThere
+ >>> fst
+ ^>> a
+ >>> jumpBack -- >>? jumpBack would not ensure the jump.
+ >>^ collapseEither
+
+-- | Do something in a sub-element, tnen come back
+executeIn :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState f s
+ -> FallibleXMLConverter nsID extraState f s
+executeIn nsID name a = keepingTheValue
+ (findChild nsID name)
+ >>> ignoringState liftFailure
+ >>? switchingTheStack a
+ where liftFailure (_, (Left f)) = Left f
+ liftFailure (x, (Right e)) = Right (x, e)
+
+--------------------------------------------------------------------------------
+-- Iterating over children
+--------------------------------------------------------------------------------
+
+-- Helper converter to prepare different types of iterations.
+-- It lifts the children (of a certain type) of the current element
+-- into the value level and pairs each one with the current input value.
+prepareIteration :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> XMLConverter nsID extraState b [(b, XML.Element)]
+prepareIteration nsID name = keepingTheValue
+ (findChildren nsID name)
+ >>% distributeValue
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects results in a 'Monoid'.
+-- Fails completely if any conversion fails.
+collectEvery :: (NameSpaceID nsID, Monoid m)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a m
+ -> FallibleXMLConverter nsID extraState a m
+collectEvery nsID name a = prepareIteration nsID name
+ >>> foldS' (switchingTheStack a)
+
+--
+withEveryL :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a b
+ -> FallibleXMLConverter nsID extraState a [b]
+withEveryL = withEvery
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects results in a 'MonadPlus'.
+-- Fails completely if any conversion fails.
+withEvery :: (NameSpaceID nsID, MonadPlus m)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a b
+ -> FallibleXMLConverter nsID extraState a (m b)
+withEvery nsID name a = prepareIteration nsID name
+ >>> iterateS' (switchingTheStack a)
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects all successful results in a list.
+tryAll :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState b a
+ -> XMLConverter nsID extraState b [a]
+tryAll nsID name a = prepareIteration nsID name
+ >>> iterateS (switchingTheStack a)
+ >>^ collectRights
+
+-- | Applies a converter to every child element of a specific type.
+-- Collects all successful results.
+tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState b a
+ -> XMLConverter nsID extraState b (c a)
+tryAll' nsID name a = prepareIteration nsID name
+ >>> iterateS (switchingTheStack a)
+ >>^ collectRightsF
+
+--------------------------------------------------------------------------------
+-- Matching children
+--------------------------------------------------------------------------------
+
+type IdXMLConverter nsID moreState x
+ = XMLConverter nsID moreState x x
+
+type MaybeEConverter nsID moreState x
+ = Maybe (IdXMLConverter nsID moreState (x, XML.Element))
+
+-- Chainable converter that helps deciding which converter to actually use.
+type ElementMatchConverter nsID extraState x
+ = IdXMLConverter nsID
+ extraState
+ (MaybeEConverter nsID extraState x, XML.Element)
+
+type MaybeCConverter nsID moreState x
+ = Maybe (IdXMLConverter nsID moreState (x, XML.Content))
+
+-- Chainable converter that helps deciding which converter to actually use.
+type ContentMatchConverter nsID extraState x
+ = IdXMLConverter nsID
+ extraState
+ (MaybeCConverter nsID extraState x, XML.Content)
+
+-- Helper function: The @c@ is actually a converter that is to be selected by
+-- matching XML elements to the first two parameters.
+-- The fold used to match elements however is very simple, so to use it,
+-- this function wraps the converter in another converter that unifies
+-- the accumulator. Think of a lot of converters with the resulting type
+-- chained together. The accumulator not only transports the element
+-- unchanged to the next matcher, it also does the actual selecting by
+-- combining the intermediate results with '(<|>)'.
+makeMatcherE :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a a
+ -> ElementMatchConverter nsID extraState a
+makeMatcherE nsID name c = ( second (
+ elemNameIs nsID name
+ >>^ bool Nothing (Just tryC)
+ )
+ >>% (<|>)
+ ) &&&^ snd
+ where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd
+
+-- Helper function: The @c@ is actually a converter that is to be selected by
+-- matching XML content to the first two parameters.
+-- The fold used to match elements however is very simple, so to use it,
+-- this function wraps the converter in another converter that unifies
+-- the accumulator. Think of a lot of converters with the resulting type
+-- chained together. The accumulator not only transports the element
+-- unchanged to the next matcher, it also does the actual selecting by
+-- combining the intermediate results with '(<|>)'.
+makeMatcherC :: (NameSpaceID nsID)
+ => nsID -> ElementName
+ -> FallibleXMLConverter nsID extraState a a
+ -> ContentMatchConverter nsID extraState a
+makeMatcherC nsID name c = ( second ( contentToElem
+ >>> returnV Nothing
+ ||| ( elemNameIs nsID name
+ >>^ bool Nothing (Just cWithJump)
+ )
+ )
+ >>% (<|>)
+ ) &&&^ snd
+ where cWithJump = ( fst
+ ^&&& ( second contentToElem
+ >>> spreadChoice
+ ^>>? executeThere c
+ )
+ >>% recover)
+ &&&^ snd
+ contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element
+ contentToElem = arr $ \e -> case e of
+ XML.Elem e' -> succeedWith e'
+ _ -> failEmpty
+
+-- Creates and chains a bunch of matchers
+prepareMatchersE :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
+ -> ElementMatchConverter nsID extraState x
+--prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE)
+prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE)
+
+-- Creates and chains a bunch of matchers
+prepareMatchersC :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)]
+ -> ContentMatchConverter nsID extraState x
+--prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC)
+prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC)
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all children of the current element
+-- * Matches each group to each child in order (at most one group per child)
+-- * Filters non-matched children
+-- * Chains all found converters in child-order
+-- * Applies the chain to the input element
+matchChildren :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchChildren lookups = let matcher = prepareMatchersE lookups
+ in keepingTheValue (
+ elChildren
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m)
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the element and drop the element
+ -- in the return value
+ swallowElem element converter = (,element) ^>> converter >>^ fst
+
+--
+matchContent'' :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchContent'' lookups = let matcher = prepareMatchersC lookups
+ in keepingTheValue (
+ elContent
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m)
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the content and drop the content
+ -- in the return value
+ swallowContent content converter = (,content) ^>> converter >>^ fst
+
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all content of the current element
+-- * Matches each group to each piece of content in order
+-- (at most one group per piece of content)
+-- * Filters non-matched content
+-- * Chains all found converters in content-order
+-- * Applies the chain to the input element
+matchContent' :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState a a
+matchContent' lookups = matchContent lookups (arr fst)
+
+-- | Takes a list of element-data - converter groups and
+-- * Finds all content of the current element
+-- * Matches each group to each piece of content in order
+-- (at most one group per piece of content)
+-- * Adds a default converter for all non-matched content
+-- * Chains all found converters in content-order
+-- * Applies the chain to the input element
+matchContent :: (NameSpaceID nsID)
+ => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)]
+ -> XMLConverter nsID extraState (a,XML.Content) a
+ -> XMLConverter nsID extraState a a
+matchContent lookups fallback
+ = let matcher = prepareMatchersC lookups
+ in keepingTheValue (
+ elContent
+ >>> map (Nothing,)
+ ^>> iterateSL matcher
+ >>^ map swallowOrFallback
+ -- >>> foldSs
+ >>> reverseComposition
+ )
+ >>> swap
+ ^>> app
+ where
+ -- let the converter swallow the content and drop the content
+ -- in the return value
+ swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst
+ swallowOrFallback (Nothing ,content) = (,content) ^>> fallback
+
+--------------------------------------------------------------------------------
+-- Internals
+--------------------------------------------------------------------------------
+
+stringToBool :: (Monoid failure) => String -> Either failure Bool
+stringToBool val -- stringToBool' val >>> maybeToChoice
+ | val `elem` trueValues = succeedWith True
+ | val `elem` falseValues = succeedWith False
+ | otherwise = failEmpty
+ where trueValues = ["true" ,"on" ,"1"]
+ falseValues = ["false","off","0"]
+
+stringToBool' :: String -> Maybe Bool
+stringToBool' val | val `elem` trueValues = Just True
+ | val `elem` falseValues = Just False
+ | otherwise = Nothing
+ where trueValues = ["true" ,"on" ,"1"]
+ falseValues = ["false","off","0"]
+
+
+distributeValue :: a -> [b] -> [(a,b)]
+distributeValue = map.(,)
+
+--------------------------------------------------------------------------------
+
+{-
+NOTES
+It might be a good idea to refactor the namespace stuff.
+E.g.: if a namespace constructor took a string as a parameter, things like
+> a ?>/< (NsText,"body")
+would be nicer.
+Together with a rename and some trickery, something like
+> |< NsText "body" >< NsText "p" ?> a </> </>|
+might even be possible.
+
+Some day, XML.Light should be replaced by something better.
+While doing that, it might be useful to replace String as the type of element
+names with something else, too. (Of course with OverloadedStrings).
+While doing that, maybe the types can be created in a way that something like
+> NsText:"body"
+could be used. Overloading (:) does not sounds like the best idea, but if the
+element name type was a list, this might be possible.
+Of course that would be a bit hackish, so the "right" way would probably be
+something like
+> InNS NsText "body"
+but isn't that a bit boring? ;)
+-}
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
new file mode 100644
index 000000000..deb009998
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -0,0 +1,110 @@
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt.Namespaces
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Namespaces used in odt files.
+-}
+
+module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
+ ) where
+
+import Data.List ( isPrefixOf )
+import Data.Maybe ( fromMaybe, listToMaybe )
+import qualified Data.Map as M ( empty, insert )
+
+import Text.Pandoc.Readers.Odt.Generic.Namespaces
+
+
+instance NameSpaceID Namespace where
+
+ getInitialIRImap = nsIDmap
+
+ getNamespaceID "" m = Just(m, NsXML)
+ getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri)
+ where asPair nsID = Just (M.insert nsID iri m, nsID)
+
+
+findID :: NameSpaceIRI -> Maybe Namespace
+findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri]
+
+nsIDmap :: NameSpaceIRIs Namespace
+nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
+
+data Namespace = -- Open Document core
+ NsOffice | NsStyle | NsText | NsTable | NsForm
+ | NsDraw | Ns3D | NsAnim | NsChart | NsConfig
+ | NsDB | NsMeta | NsNumber | NsScript | NsManifest
+ | NsPresentation
+ -- Metadata
+ | NsODF
+ -- Compatible elements
+ | NsXSL_FO | NsSVG | NsSmil
+ -- External standards
+ | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL
+ | NsDublinCore
+ -- Metadata manifest
+ | NsPKG
+ -- Others
+ | NsOpenFormula
+ -- Core XML (basically only for the 'id'-attribute)
+ | NsXML
+ -- Fallback
+ | NsOther String
+ deriving ( Eq, Ord, Show )
+
+-- | Not the actual iri's, but large prefixes of them - this way there are
+-- less versioning problems and the like.
+nsIDs :: [(String,Namespace)]
+nsIDs = [
+ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
+ ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
+ ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ),
+ ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ),
+ ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ),
+ ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ),
+ ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ),
+ ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ),
+ ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ),
+ ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ),
+ ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ),
+ ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ),
+ ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ),
+ ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ),
+ ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ),
+ ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ),
+ ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ),
+ ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ),
+ ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ),
+ ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ),
+ ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ),
+ ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ),
+ ("http://purl.org/dc/elements" , NsDublinCore ),
+ ("http://www.w3.org/2003/g/data-view" , NsGRDDL ),
+ ("http://www.w3.org/1998/Math/MathML" , NsMathML ),
+ ("http://www.w3.org/1999/xhtml" , NsXHtml ),
+ ("http://www.w3.org/2002/xforms" , NsXForms ),
+ ("http://www.w3.org/1999/xlink" , NsXLink )
+ ]
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
new file mode 100644
index 000000000..26ba6df82
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -0,0 +1,744 @@
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE Arrows #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Odt.StyleReader
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Reader for the style information in an odt document.
+-}
+
+module Text.Pandoc.Readers.Odt.StyleReader
+( Style (..)
+, StyleName
+, StyleFamily (..)
+, Styles (..)
+, StyleProperties (..)
+, TextProperties (..)
+, ParaProperties (..)
+, VerticalTextPosition (..)
+, ListItemNumberFormat (..)
+, ListLevel
+, ListStyle (..)
+, ListLevelStyle (..)
+, ListLevelType (..)
+, LengthOrPercent (..)
+, lookupStyle
+, getTextProperty
+, getTextProperty'
+, getParaProperty
+, getListStyle
+, getListLevelStyle
+, getStyleFamily
+, lookupDefaultStyle
+, lookupDefaultStyle'
+, lookupListStyleByName
+, getPropertyChain
+, textPropertyChain
+, stylePropertyChain
+, stylePropertyChain'
+, getStylePropertyChain
+, extendedStylePropertyChain
+, extendedStylePropertyChain'
+, liftStyles
+, readStylesAt
+) where
+
+import Control.Arrow
+import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+
+import qualified Data.Foldable as F
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Char ( isDigit )
+import Data.Default
+import Data.List ( unfoldr )
+import Data.Maybe
+
+import qualified Text.XML.Light as XML
+
+import Text.Pandoc.Readers.Odt.Arrows.State
+import Text.Pandoc.Readers.Odt.Arrows.Utils
+
+import Text.Pandoc.Readers.Odt.Generic.Utils
+import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+
+import Text.Pandoc.Readers.Odt.Namespaces
+import Text.Pandoc.Readers.Odt.Base
+
+
+readStylesAt :: XML.Element -> Fallible Styles
+readStylesAt e = runConverter' readAllStyles mempty e
+
+--------------------------------------------------------------------------------
+-- Reader for font declarations and font pitches
+--------------------------------------------------------------------------------
+
+-- Pandoc has no support for different font pitches. Yet knowing them can be
+-- very helpful in cases where Pandoc has more semantics than OpenDocument.
+-- In these cases, the pitch can help deciding as what to define a block of
+-- text. So let's start with a type for font pitches:
+
+data FontPitch = PitchVariable | PitchFixed
+ deriving ( Eq, Show )
+
+instance Lookupable FontPitch where
+ lookupTable = [ ("variable" , PitchVariable)
+ , ("fixed" , PitchFixed )
+ ]
+
+instance Default FontPitch where
+ def = PitchVariable
+
+-- The font pitch can be specifed in a style directly. Normally, however,
+-- it is defined in the font. That is also the specs' recommendation.
+--
+-- Thus, we want
+
+type FontFaceName = String
+
+type FontPitches = M.Map FontFaceName FontPitch
+
+-- To get there, the fonts have to be read and the pitches extracted.
+-- But the resulting map are only needed at one later place, so it should not be
+-- transported on the value level, especially as we already use a state arrow.
+-- So instead, the resulting map is lifted into the state of the reader.
+-- (An alternative might be ImplicitParams, but again, we already have a state.)
+--
+-- So the main style readers will have the types
+type StyleReader a b = XMLReader FontPitches a b
+-- and
+type StyleReaderSafe a b = XMLReaderSafe FontPitches a b
+-- respectively.
+--
+-- But before we can work with these, we need to define the reader that reads
+-- the fonts:
+
+-- | A reader for font pitches
+fontPitchReader :: XMLReader _s _x FontPitches
+fontPitchReader = executeIn NsOffice "font-face-decls" (
+ ( withEveryL NsStyle "font-face" $ liftAsSuccess (
+ findAttr' NsStyle "name"
+ &&&
+ lookupDefaultingAttr NsStyle "font-pitch"
+ )
+ )
+ >>?^ ( M.fromList . (foldl accumLegalPitches []) )
+ )
+ where accumLegalPitches ls (Nothing,_) = ls
+ accumLegalPitches ls (Just n,p) = (n,p):ls
+
+
+-- | A wrapper around the font pitch reader that lifts the result into the
+-- state.
+readFontPitches :: StyleReader x x
+readFontPitches = producingExtraState () () fontPitchReader
+
+
+-- | Looking up a pitch in the state of the arrow.
+--
+-- The function does the following:
+-- * Look for the font pitch in an attribute.
+-- * If that fails, look for the font name, look up the font in the state
+-- and use the pitch from there.
+-- * Return the result in a Maybe
+--
+findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch)
+findPitch = ( lookupAttr NsStyle "font-pitch"
+ `ifFailedDo` findAttr NsStyle "font-name"
+ >>? ( keepingTheValue getExtraState
+ >>% M.lookup
+ >>^ maybeToChoice
+ )
+ )
+ >>> choiceToMaybe
+
+--------------------------------------------------------------------------------
+-- Definitions of main data
+--------------------------------------------------------------------------------
+
+type StyleName = String
+
+-- | There are two types of styles: named styles with a style family and an
+-- optional style parent, and default styles for each style family,
+-- defining default style properties
+data Styles = Styles
+ { stylesByName :: M.Map StyleName Style
+ , listStylesByName :: M.Map StyleName ListStyle
+ , defaultStyleMap :: M.Map StyleFamily StyleProperties
+ }
+ deriving ( Show )
+
+-- Styles from a monoid under union
+instance Monoid Styles where
+ mempty = Styles M.empty M.empty M.empty
+ mappend (Styles sBn1 dSm1 lsBn1)
+ (Styles sBn2 dSm2 lsBn2)
+ = Styles (M.union sBn1 sBn2)
+ (M.union dSm1 dSm2)
+ (M.union lsBn1 lsBn2)
+
+-- Not all families from the specifications are implemented, only those we need.
+-- But there are none that are not mentioned here.
+data StyleFamily = FaText | FaParagraph
+-- | FaTable | FaTableCell | FaTableColumn | FaTableRow
+-- | FaGraphic | FaDrawing | FaChart
+-- | FaPresentation
+-- | FaRuby
+ deriving ( Eq, Ord, Show )
+
+instance Lookupable StyleFamily where
+ lookupTable = [ ( "text" , FaText )
+ , ( "paragraph" , FaParagraph )
+-- , ( "table" , FaTable )
+-- , ( "table-cell" , FaTableCell )
+-- , ( "table-column" , FaTableColumn )
+-- , ( "table-row" , FaTableRow )
+-- , ( "graphic" , FaGraphic )
+-- , ( "drawing-page" , FaDrawing )
+-- , ( "chart" , FaChart )
+-- , ( "presentation" , FaPresentation )
+-- , ( "ruby" , FaRuby )
+ ]
+
+-- | A named style
+data Style = Style { styleFamily :: Maybe StyleFamily
+ , styleParentName :: Maybe StyleName
+ , listStyle :: Maybe StyleName
+ , styleProperties :: StyleProperties
+ }
+ deriving ( Eq, Show )
+
+data StyleProperties = SProps { textProperties :: Maybe TextProperties
+ , paraProperties :: Maybe ParaProperties
+-- , tableColProperties :: Maybe TColProperties
+-- , tableRowProperties :: Maybe TRowProperties
+-- , tableCellProperties :: Maybe TCellProperties
+-- , tableProperties :: Maybe TableProperties
+-- , graphicProperties :: Maybe GraphProperties
+ }
+ deriving ( Eq, Show )
+
+instance Default StyleProperties where
+ def = SProps { textProperties = Just def
+ , paraProperties = Just def
+ }
+
+data TextProperties = PropT { isEmphasised :: Bool
+ , isStrong :: Bool
+ , pitch :: Maybe FontPitch
+ , verticalPosition :: VerticalTextPosition
+ , underline :: Maybe UnderlineMode
+ , strikethrough :: Maybe UnderlineMode
+ }
+ deriving ( Eq, Show )
+
+instance Default TextProperties where
+ def = PropT { isEmphasised = False
+ , isStrong = False
+ , pitch = Just def
+ , verticalPosition = def
+ , underline = Nothing
+ , strikethrough = Nothing
+ }
+
+data ParaProperties = PropP { paraNumbering :: ParaNumbering
+ , indentation :: LengthOrPercent
+ , margin_left :: LengthOrPercent
+ }
+ deriving ( Eq, Show )
+
+instance Default ParaProperties where
+ def = PropP { paraNumbering = NumberingNone
+ , indentation = def
+ , margin_left = def
+ }
+
+----
+-- All the little data types that make up the properties
+----
+
+data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub
+ deriving ( Eq, Show )
+
+instance Default VerticalTextPosition where
+ def = VPosNormal
+
+instance Read VerticalTextPosition where
+ readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ]
+ ++ [ (VPosSuper , s') | ("super" , s') <- lexS ]
+ ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ]
+ where
+ lexS = lex s
+ signumToVPos n | n < 0 = VPosSub
+ | n > 0 = VPosSuper
+ | otherwise = VPosNormal
+
+data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace
+ deriving ( Eq, Show )
+
+instance Lookupable UnderlineMode where
+ lookupTable = [ ( "continuous" , UnderlineModeNormal )
+ , ( "skip-white-space" , UnderlineModeSkipWhitespace )
+ ]
+
+
+data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int
+ deriving ( Eq, Show )
+
+data LengthOrPercent = LengthValueMM Int | PercentValue Int
+ deriving ( Eq, Show )
+
+instance Default LengthOrPercent where
+ def = LengthValueMM 0
+
+instance Read LengthOrPercent where
+ readsPrec _ s =
+ [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s]
+ ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s
+ , (unit , s'') <- reads s'
+ , let lengthMM = estimateInMillimeter
+ length' unit
+ ]
+
+data XslUnit = XslUnitMM | XslUnitCM
+ | XslUnitInch
+ | XslUnitPoints | XslUnitPica
+ | XslUnitPixel
+ | XslUnitEM
+
+instance Show XslUnit where
+ show XslUnitMM = "mm"
+ show XslUnitCM = "cm"
+ show XslUnitInch = "in"
+ show XslUnitPoints = "pt"
+ show XslUnitPica = "pc"
+ show XslUnitPixel = "px"
+ show XslUnitEM = "em"
+
+instance Read XslUnit where
+ readsPrec _ "mm" = [(XslUnitMM , "")]
+ readsPrec _ "cm" = [(XslUnitCM , "")]
+ readsPrec _ "in" = [(XslUnitInch , "")]
+ readsPrec _ "pt" = [(XslUnitPoints , "")]
+ readsPrec _ "pc" = [(XslUnitPica , "")]
+ readsPrec _ "px" = [(XslUnitPixel , "")]
+ readsPrec _ "em" = [(XslUnitEM , "")]
+ readsPrec _ _ = []
+
+-- | Rough conversion of measures into millimeters.
+-- Pixels and em's are actually implemetation dependant/relative measures,
+-- so I could not really easily calculate anything exact here even if I wanted.
+-- But I do not care about exactness right now, as I only use measures
+-- to determine if a paragraph is "indented" or not.
+estimateInMillimeter :: Int -> XslUnit -> Int
+estimateInMillimeter n XslUnitMM = n
+estimateInMillimeter n XslUnitCM = n * 10
+estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4
+estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4
+estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4
+estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4
+estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4
+
+
+----
+-- List styles
+----
+
+type ListLevel = Int
+
+newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle
+ }
+ deriving ( Eq, Show )
+
+--
+getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle
+getListLevelStyle level ListStyle{..} =
+ let (lower , exactHit , _) = M.splitLookup level levelStyles
+ in exactHit <|> fmap fst (M.maxView lower)
+ -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1]
+ -- \^ simpler, but in general less efficient
+
+data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
+ , listItemPrefix :: Maybe String
+ , listItemSuffix :: Maybe String
+ , listItemFormat :: ListItemNumberFormat
+ , listItemStart :: Int
+ }
+ deriving ( Eq, Ord )
+
+instance Show ListLevelStyle where
+ show ListLevelStyle{..} = "<LLS|"
+ ++ (show listLevelType)
+ ++ "|"
+ ++ (maybeToString listItemPrefix)
+ ++ (show listItemFormat)
+ ++ (maybeToString listItemSuffix)
+ ++ ">"
+ where maybeToString = fromMaybe ""
+
+data ListLevelType = LltBullet | LltImage | LltNumbered
+ deriving ( Eq, Ord, Show )
+
+data ListItemNumberFormat = LinfNone
+ | LinfNumber
+ | LinfRomanLC | LinfRomanUC
+ | LinfAlphaLC | LinfAlphaUC
+ | LinfString String
+ deriving ( Eq, Ord )
+
+instance Show ListItemNumberFormat where
+ show LinfNone = ""
+ show LinfNumber = "1"
+ show LinfRomanLC = "i"
+ show LinfRomanUC = "I"
+ show LinfAlphaLC = "a"
+ show LinfAlphaUC = "A"
+ show (LinfString s) = s
+
+instance Default ListItemNumberFormat where
+ def = LinfNone
+
+instance Read ListItemNumberFormat where
+ readsPrec _ "" = [(LinfNone , "")]
+ readsPrec _ "1" = [(LinfNumber , "")]
+ readsPrec _ "i" = [(LinfRomanLC , "")]
+ readsPrec _ "I" = [(LinfRomanUC , "")]
+ readsPrec _ "a" = [(LinfAlphaLC , "")]
+ readsPrec _ "A" = [(LinfAlphaUC , "")]
+ readsPrec _ s = [(LinfString s , "")]
+
+--------------------------------------------------------------------------------
+-- Readers
+--
+-- ...it seems like a whole lot of this should be automatically deriveable
+-- or at least moveable into a class. Most of this is data concealed in
+-- code.
+--------------------------------------------------------------------------------
+
+--
+readAllStyles :: StyleReader _x Styles
+readAllStyles = ( readFontPitches
+ >>?! ( readAutomaticStyles
+ &&& readStyles ))
+ >>?%? chooseMax
+ -- all top elements are always on the same hierarchy level
+
+--
+readStyles :: StyleReader _x Styles
+readStyles = executeIn NsOffice "styles" $ liftAsSuccess
+ $ liftA3 Styles
+ ( tryAll NsStyle "style" readStyle >>^ M.fromList )
+ ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
+ ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList )
+
+--
+readAutomaticStyles :: StyleReader _x Styles
+readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess
+ $ liftA3 Styles
+ ( tryAll NsStyle "style" readStyle >>^ M.fromList )
+ ( tryAll NsText "list-style" readListStyle >>^ M.fromList )
+ ( returnV M.empty )
+
+--
+readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties)
+readDefaultStyle = lookupAttr NsStyle "family"
+ >>?! keepingTheValue readStyleProperties
+
+--
+readStyle :: StyleReader _x (StyleName,Style)
+readStyle = findAttr NsStyle "name"
+ >>?! keepingTheValue
+ ( liftA4 Style
+ ( lookupAttr' NsStyle "family" )
+ ( findAttr' NsStyle "parent-style-name" )
+ ( findAttr' NsStyle "list-style-name" )
+ readStyleProperties
+ )
+
+--
+readStyleProperties :: StyleReaderSafe _x StyleProperties
+readStyleProperties = liftA2 SProps
+ ( readTextProperties >>> choiceToMaybe )
+ ( readParaProperties >>> choiceToMaybe )
+
+--
+readTextProperties :: StyleReader _x TextProperties
+readTextProperties =
+ executeIn NsStyle "text-properties" $ liftAsSuccess
+ ( liftA6 PropT
+ ( searchAttr NsXSL_FO "font-style" False isFontEmphasised )
+ ( searchAttr NsXSL_FO "font-weight" False isFontBold )
+ ( findPitch )
+ ( getAttr NsStyle "text-position" )
+ ( readUnderlineMode )
+ ( readStrikeThroughMode )
+ )
+ where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
+ isFontBold = ("normal",False):("bold",True)
+ :(map ((,True).show) ([100,200..900]::[Int]))
+
+readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
+readUnderlineMode = readLineMode "text-underline-mode"
+ "text-underline-style"
+
+readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
+readStrikeThroughMode = readLineMode "text-line-through-mode"
+ "text-line-through-style"
+
+readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode modeAttr styleAttr = proc x -> do
+ isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
+ mode <- lookupAttr' NsStyle modeAttr -< x
+ if isUL
+ then case mode of
+ Just m -> returnA -< Just m
+ Nothing -> returnA -< Just UnderlineModeNormal
+ else returnA -< Nothing
+ where
+ isLinePresent = [("none",False)] ++ map (,True)
+ [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted"
+ , "long-dash" , "solid" , "wave"
+ ]
+
+--
+readParaProperties :: StyleReader _x ParaProperties
+readParaProperties =
+ executeIn NsStyle "paragraph-properties" $ liftAsSuccess
+ ( liftA3 PropP
+ ( liftA2 readNumbering
+ ( isSet' NsText "number-lines" )
+ ( readAttr' NsText "line-number" )
+ )
+ ( liftA2 readIndentation
+ ( isSetWithDefault NsStyle "auto-text-indent" False )
+ ( getAttr NsXSL_FO "text-indent" )
+ )
+ ( getAttr NsXSL_FO "margin-left" )
+ )
+ where readNumbering (Just True) (Just n) = NumberingRestart n
+ readNumbering (Just True) _ = NumberingKeep
+ readNumbering _ _ = NumberingNone
+
+ readIndentation False indent = indent
+ readIndentation True _ = def
+
+----
+-- List styles
+----
+
+--
+readListStyle :: StyleReader _x (StyleName, ListStyle)
+readListStyle =
+ findAttr NsStyle "name"
+ >>?! keepingTheValue
+ ( liftA ListStyle
+ $ ( liftA3 SM.union3
+ ( readListLevelStyles NsText "list-level-style-number" LltNumbered )
+ ( readListLevelStyles NsText "list-level-style-bullet" LltBullet )
+ ( readListLevelStyles NsText "list-level-style-image" LltImage )
+ ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle
+ )
+--
+readListLevelStyles :: Namespace -> ElementName
+ -> ListLevelType
+ -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle)
+readListLevelStyles namespace elementName levelType =
+ ( tryAll namespace elementName (readListLevelStyle levelType)
+ >>^ SM.fromList
+ )
+
+--
+readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
+readListLevelStyle levelType = readAttr NsText "level"
+ >>?! keepingTheValue
+ ( liftA5 toListLevelStyle
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ ( findAttr' NsText "start-value" )
+ )
+ where
+ toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)
+ toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b)
+ toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b)
+ startValue (Just "") = 1
+ startValue (Just v) = if all isDigit v
+ then read v
+ else 1
+ startValue Nothing = 1
+
+--
+chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
+chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
+ | otherwise = Just ( F.foldr1 select ls )
+ where
+ select ( ListLevelStyle t1 p1 s1 f1 b1 )
+ ( ListLevelStyle t2 p2 s2 f2 _ )
+ = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+ select' LltNumbered _ = LltNumbered
+ select' _ LltNumbered = LltNumbered
+ select' _ _ = LltBullet
+ selectLinf LinfNone f2 = f2
+ selectLinf f1 LinfNone = f1
+ selectLinf (LinfString _) f2 = f2
+ selectLinf f1 (LinfString _) = f1
+ selectLinf f1 _ = f1
+
+
+--------------------------------------------------------------------------------
+-- Tools to access style data
+--------------------------------------------------------------------------------
+
+--
+lookupStyle :: StyleName -> Styles -> Maybe Style
+lookupStyle name Styles{..} = M.lookup name stylesByName
+
+--
+lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties
+lookupDefaultStyle family Styles{..} = fromMaybe def
+ (M.lookup family defaultStyleMap)
+
+--
+lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties
+lookupDefaultStyle' Styles{..} family = fromMaybe def
+ (M.lookup family defaultStyleMap)
+
+--
+getListStyle :: Style -> Styles -> Maybe ListStyle
+getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles)
+
+--
+lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle
+lookupListStyleByName name Styles{..} = M.lookup name listStylesByName
+
+
+-- | Returns a chain of parent of the current style. The direct parent will
+-- be the first element of the list, followed by its parent and so on.
+-- The current style is not in the list.
+parents :: Style -> Styles -> [Style]
+parents style styles = unfoldr findNextParent style -- Ha!
+ where findNextParent Style{..}
+ = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName
+
+-- | Looks up the style family of the current style. Normally, every style
+-- should have one. But if not, all parents are searched.
+getStyleFamily :: Style -> Styles -> Maybe StyleFamily
+getStyleFamily style@Style{..} styles
+ = styleFamily
+ <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles)
+
+-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property
+-- values are specified. Instead, a value might be inherited from a
+-- parent style. This function makes this chain of inheritance
+-- concrete and easily accessible by encapsulating the necessary lookups.
+-- The resulting list contains the direct properties of the style as the first
+-- element, the ones of the direct parent element as the next one, and so on.
+--
+-- Note: There should also be default properties for each style family. These
+-- are @not@ contained in this list because properties inherited from
+-- parent elements take precedence over default styles.
+--
+-- This function is primarily meant to be used through convenience wrappers.
+--
+stylePropertyChain :: Style -> Styles -> [StyleProperties]
+stylePropertyChain style styles
+ = map styleProperties (style : parents style styles)
+
+--
+extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties]
+extendedStylePropertyChain [] _ = []
+extendedStylePropertyChain [style] styles = (stylePropertyChain style styles)
+ ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
+extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles)
+ ++ (extendedStylePropertyChain trace styles)
+-- Optimizable with Data.Sequence
+
+--
+extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties]
+extendedStylePropertyChain' [] _ = Nothing
+extendedStylePropertyChain' [style] styles = Just (
+ (stylePropertyChain style styles)
+ ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles)))
+ )
+extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++)
+ (extendedStylePropertyChain' trace styles)
+
+--
+stylePropertyChain' :: Styles -> Style -> [StyleProperties]
+stylePropertyChain' = flip stylePropertyChain
+
+--
+getStylePropertyChain :: StyleName -> Styles -> [StyleProperties]
+getStylePropertyChain name styles = maybe []
+ (`stylePropertyChain` styles)
+ (lookupStyle name styles)
+
+--
+getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a]
+getPropertyChain extract style styles = catMaybes
+ $ map extract
+ $ stylePropertyChain style styles
+
+--
+textPropertyChain :: Style -> Styles -> [TextProperties]
+textPropertyChain = getPropertyChain textProperties
+
+--
+paraPropertyChain :: Style -> Styles -> [ParaProperties]
+paraPropertyChain = getPropertyChain paraProperties
+
+--
+getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a
+getTextProperty extract style styles = fmap extract
+ $ listToMaybe
+ $ textPropertyChain style styles
+
+--
+getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a
+getTextProperty' extract style styles = F.asum
+ $ map extract
+ $ textPropertyChain style styles
+
+--
+getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a
+getParaProperty extract style styles = fmap extract
+ $ listToMaybe
+ $ paraPropertyChain style styles
+
+-- | Lifts the reader into another readers' state.
+liftStyles :: (OdtConverterState s -> OdtConverterState Styles)
+ -> (OdtConverterState Styles -> OdtConverterState s )
+ -> XMLReader s x x
+liftStyles extract inject = switchState extract inject
+ $ convertingExtraState M.empty readAllStyles
+
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
new file mode 100644
index 000000000..c8dbbf45a
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -0,0 +1,62 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Conversion of org-mode formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Org ( readOrg ) where
+
+import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
+import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
+import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState )
+
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+
+import Control.Monad.Except ( throwError )
+import Control.Monad.Reader ( runReaderT )
+
+
+-- | Parse org-mode string and return a Pandoc document.
+readOrg :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readOrg opts s = do
+ parsed <- flip runReaderT def $
+ readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left _ -> throwError $ PandocParseError "problem parsing org"
+
+--
+-- Parser
+--
+parseOrg :: PandocMonad m => OrgParser m Pandoc
+parseOrg = do
+ blocks' <- blockList
+ meta' <- meta
+ return $ Pandoc meta' blocks'
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
new file mode 100644
index 000000000..5588c4552
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -0,0 +1,137 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.BlockStarts
+ ( exampleLineStart
+ , hline
+ , noteMarker
+ , tableStart
+ , drawerStart
+ , headerStart
+ , metaLineStart
+ , latexEnvStart
+ , commentLineStart
+ , bulletListStart
+ , orderedListStart
+ , endOfBlock
+ ) where
+
+import Control.Monad ( void )
+import Text.Pandoc.Readers.Org.Parsing
+
+-- | Horizontal Line (five -- dashes or more)
+hline :: Monad m => OrgParser m ()
+hline = try $ do
+ skipSpaces
+ string "-----"
+ many (char '-')
+ skipSpaces
+ newline
+ return ()
+
+-- | Read the start of a header line, return the header level
+headerStart :: Monad m => OrgParser m Int
+headerStart = try $
+ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
+
+tableStart :: Monad m => OrgParser m Char
+tableStart = try $ skipSpaces *> char '|'
+
+latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart = try $ do
+ skipSpaces *> string "\\begin{"
+ *> latexEnvName
+ <* string "}"
+ <* blankline
+ where
+ latexEnvName :: Monad m => OrgParser m String
+ latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+
+
+-- | Parses bullet list marker.
+bulletListStart :: Monad m => OrgParser m ()
+bulletListStart = try $
+ choice
+ [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
+ , () <$ skipSpaces1 <* char '*' <* skipSpaces1
+ ]
+
+genericListStart :: Monad m
+ => OrgParser m String
+ -> OrgParser m Int
+genericListStart listMarker = try $
+ (+) <$> (length <$> many spaceChar)
+ <*> (length <$> listMarker <* many1 spaceChar)
+
+orderedListStart :: Monad m => OrgParser m Int
+orderedListStart = genericListStart orderedListMarker
+ -- Ordered list markers allowed in org-mode
+ where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+drawerStart :: Monad m => OrgParser m String
+drawerStart = try $
+ skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+
+metaLineStart :: Monad m => OrgParser m ()
+metaLineStart = try $ skipSpaces <* string "#+"
+
+commentLineStart :: Monad m => OrgParser m ()
+commentLineStart = try $ skipSpaces <* string "# "
+
+exampleLineStart :: Monad m => OrgParser m ()
+exampleLineStart = () <$ try (skipSpaces *> string ": ")
+
+noteMarker :: Monad m => OrgParser m String
+noteMarker = try $ do
+ char '['
+ choice [ many1Till digit (char ']')
+ , (++) <$> string "fn:"
+ <*> many1Till (noneOf "\n\r\t ") (char ']')
+ ]
+
+-- | Succeeds if the parser is at the end of a block.
+endOfBlock :: Monad m => OrgParser m ()
+endOfBlock = lookAhead . try $ do
+ void blankline <|> anyBlockStart
+ where
+ -- Succeeds if there is a new block starting at this position.
+ anyBlockStart :: Monad m => OrgParser m ()
+ anyBlockStart = try . choice $
+ [ exampleLineStart
+ , hline
+ , metaLineStart
+ , commentLineStart
+ , void noteMarker
+ , void tableStart
+ , void drawerStart
+ , void headerStart
+ , void latexEnvStart
+ , void bulletListStart
+ , void orderedListStart
+ ]
+
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
new file mode 100644
index 000000000..78ac8d0d1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -0,0 +1,979 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2017 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode block elements.
+-}
+module Text.Pandoc.Readers.Org.Blocks
+ ( blockList
+ , meta
+ ) where
+
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.Inlines
+import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine )
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks )
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead )
+
+import Control.Monad ( foldM, guard, mzero, void )
+import Data.Char ( isSpace, toLower, toUpper)
+import Data.Default ( Default )
+import Data.List ( foldl', isPrefixOf )
+import Data.Maybe ( fromMaybe, isNothing )
+import Data.Monoid ((<>))
+
+--
+-- Org headers
+--
+newtype Tag = Tag { fromTag :: String }
+ deriving (Show, Eq)
+
+-- | Create a tag containing the given string.
+toTag :: String -> Tag
+toTag = Tag
+
+-- | The key (also called name or type) of a property.
+newtype PropertyKey = PropertyKey { fromKey :: String }
+ deriving (Show, Eq, Ord)
+
+-- | Create a property key containing the given string. Org mode keys are
+-- case insensitive and are hence converted to lower case.
+toPropertyKey :: String -> PropertyKey
+toPropertyKey = PropertyKey . map toLower
+
+-- | The value assigned to a property.
+newtype PropertyValue = PropertyValue { fromValue :: String }
+
+-- | Create a property value containing the given string.
+toPropertyValue :: String -> PropertyValue
+toPropertyValue = PropertyValue
+
+-- | Check whether the property value is non-nil (i.e. truish).
+isNonNil :: PropertyValue -> Bool
+isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+
+-- | Key/value pairs from a PROPERTIES drawer
+type Properties = [(PropertyKey, PropertyValue)]
+
+-- | Org mode headline (i.e. a document subtree).
+data Headline = Headline
+ { headlineLevel :: Int
+ , headlineTodoMarker :: Maybe TodoMarker
+ , headlineText :: Inlines
+ , headlineTags :: [Tag]
+ , headlineProperties :: Properties
+ , headlineContents :: Blocks
+ , headlineChildren :: [Headline]
+ }
+
+--
+-- Parsing headlines and subtrees
+--
+
+-- | Read an Org mode headline and its contents (i.e. a document subtree).
+-- @lvl@ gives the minimum acceptable level of the tree.
+headline :: PandocMonad m => Int -> OrgParser m (F Headline)
+headline lvl = try $ do
+ level <- headerStart
+ guard (lvl <= level)
+ todoKw <- optionMaybe todoKeyword
+ title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
+ tags <- option [] headerTags
+ newline
+ properties <- option mempty propertiesDrawer
+ contents <- blocks
+ children <- many (headline (level + 1))
+ return $ do
+ title' <- title
+ contents' <- contents
+ children' <- sequence children
+ return $ Headline
+ { headlineLevel = level
+ , headlineTodoMarker = todoKw
+ , headlineText = title'
+ , headlineTags = tags
+ , headlineProperties = properties
+ , headlineContents = contents'
+ , headlineChildren = children'
+ }
+ where
+ endOfTitle :: Monad m => OrgParser m ()
+ endOfTitle = void . lookAhead $ optional headerTags *> newline
+
+ headerTags :: Monad m => OrgParser m [Tag]
+ headerTags = try $
+ let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
+
+-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
+headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
+headlineToBlocks hdln@(Headline {..}) = do
+ maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+ case () of
+ _ | any isNoExportTag headlineTags -> return mempty
+ _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
+ _ | isCommentTitle headlineText -> return mempty
+ _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
+ _ | otherwise -> headlineToHeaderWithContents hdln
+
+isNoExportTag :: Tag -> Bool
+isNoExportTag = (== toTag "noexport")
+
+isArchiveTag :: Tag -> Bool
+isArchiveTag = (== toTag "ARCHIVE")
+
+-- | Check if the title starts with COMMENT.
+-- FIXME: This accesses builder internals not intended for use in situations
+-- like these. Replace once keyword parsing is supported.
+isCommentTitle :: Inlines -> Bool
+isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
+isCommentTitle _ = False
+
+archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
+archivedHeadlineToBlocks hdln = do
+ archivedTreesOption <- getExportSetting exportArchivedTrees
+ case archivedTreesOption of
+ ArchivedTreesNoExport -> return mempty
+ ArchivedTreesExport -> headlineToHeaderWithContents hdln
+ ArchivedTreesHeadlineOnly -> headlineToHeader hdln
+
+headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeaderWithList hdln@(Headline {..}) = do
+ maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+ header <- headlineToHeader hdln
+ listElements <- sequence (map headlineToBlocks headlineChildren)
+ let listBlock = if null listElements
+ then mempty
+ else B.orderedList listElements
+ let headerText = if maxHeadlineLevels == headlineLevel
+ then header
+ else flattenHeader header
+ return $ headerText <> headlineContents <> listBlock
+ where
+ flattenHeader :: Blocks -> Blocks
+ flattenHeader blks =
+ case B.toList blks of
+ (Header _ _ inlns:_) -> B.para (B.fromList inlns)
+ _ -> mempty
+
+headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeaderWithContents hdln@(Headline {..}) = do
+ header <- headlineToHeader hdln
+ childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
+ return $ header <> headlineContents <> childrenBlocks
+
+headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
+headlineToHeader (Headline {..}) = do
+ exportTodoKeyword <- getExportSetting exportWithTodoKeywords
+ let todoText = if exportTodoKeyword
+ then case headlineTodoMarker of
+ Just kw -> todoKeywordToInlines kw <> B.space
+ Nothing -> mempty
+ else mempty
+ let text = tagTitle (todoText <> headlineText) headlineTags
+ let propAttr = propertiesToAttr headlineProperties
+ attr <- registerHeader propAttr headlineText
+ return $ B.headerWith attr headlineLevel text
+
+todoKeyword :: Monad m => OrgParser m TodoMarker
+todoKeyword = try $ do
+ taskStates <- activeTodoMarkers <$> getState
+ let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
+ choice (map kwParser taskStates)
+
+todoKeywordToInlines :: TodoMarker -> Inlines
+todoKeywordToInlines tdm =
+ let todoText = todoMarkerName tdm
+ todoState = map toLower . show $ todoMarkerState tdm
+ classes = [todoState, todoText]
+ in B.spanWith (mempty, classes, mempty) (B.str todoText)
+
+propertiesToAttr :: Properties -> Attr
+propertiesToAttr properties =
+ let
+ toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
+ customIdKey = toPropertyKey "custom_id"
+ classKey = toPropertyKey "class"
+ unnumberedKey = toPropertyKey "unnumbered"
+ specialProperties = [customIdKey, classKey, unnumberedKey]
+ id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
+ cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
+ kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ $ properties
+ isUnnumbered =
+ fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties
+ in
+ (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs')
+
+tagTitle :: Inlines -> [Tag] -> Inlines
+tagTitle title tags = title <> (mconcat $ map tagToInline tags)
+
+tagToInline :: Tag -> Inlines
+tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
+
+
+--
+-- parsing blocks
+--
+
+-- | Get a list of blocks.
+blockList :: PandocMonad m => OrgParser m [Block]
+blockList = do
+ initialBlocks <- blocks
+ headlines <- sequence <$> manyTill (headline 1) eof
+ st <- getState
+ headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st
+ return . B.toList $ (runF initialBlocks st) <> headlineBlocks
+
+-- | Get the meta information safed in the state.
+meta :: Monad m => OrgParser m Meta
+meta = do
+ meta' <- metaExport
+ runF meta' <$> getState
+
+blocks :: PandocMonad m => OrgParser m (F Blocks)
+blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
+
+block :: PandocMonad m => OrgParser m (F Blocks)
+block = choice [ mempty <$ blanklines
+ , table
+ , orgBlock
+ , figure
+ , example
+ , genericDrawer
+ , specialLine
+ , horizontalRule
+ , list
+ , latexFragment
+ , noteBlock
+ , paraOrPlain
+ ] <?> "block"
+
+
+--
+-- Block Attributes
+--
+
+-- | Attributes that may be added to figures (like a name or caption).
+data BlockAttributes = BlockAttributes
+ { blockAttrName :: Maybe String
+ , blockAttrLabel :: Maybe String
+ , blockAttrCaption :: Maybe (F Inlines)
+ , blockAttrKeyValues :: [(String, String)]
+ }
+
+-- | Convert BlockAttributes into pandoc Attr
+attrFromBlockAttributes :: BlockAttributes -> Attr
+attrFromBlockAttributes (BlockAttributes{..}) =
+ let
+ ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
+ classes = case lookup "class" blockAttrKeyValues of
+ Nothing -> []
+ Just clsStr -> words clsStr
+ kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
+ in (ident, classes, kv)
+
+stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String)
+stringyMetaAttribute attrCheck = try $ do
+ metaLineStart
+ attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ guard $ attrCheck attrName
+ skipSpaces
+ attrValue <- anyLine
+ return (attrName, attrValue)
+
+blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
+blockAttributes = try $ do
+ kv <- many (stringyMetaAttribute attrCheck)
+ let caption = foldl' (appendValues "CAPTION") Nothing kv
+ let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
+ let name = lookup "NAME" kv
+ let label = lookup "LABEL" kv
+ caption' <- case caption of
+ Nothing -> return Nothing
+ Just s -> Just <$> parseFromString inlines (s ++ "\n")
+ kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ return $ BlockAttributes
+ { blockAttrName = name
+ , blockAttrLabel = label
+ , blockAttrCaption = caption'
+ , blockAttrKeyValues = kvAttrs'
+ }
+ where
+ attrCheck :: String -> Bool
+ attrCheck attr =
+ case attr of
+ "NAME" -> True
+ "LABEL" -> True
+ "CAPTION" -> True
+ "ATTR_HTML" -> True
+ _ -> False
+
+ appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues attrName accValue (key, value) =
+ if key /= attrName
+ then accValue
+ else case accValue of
+ Just acc -> Just $ acc ++ ' ':value
+ Nothing -> Just value
+
+keyValues :: Monad m => OrgParser m [(String, String)]
+keyValues = try $
+ manyTill ((,) <$> key <*> value) newline
+ where
+ key :: Monad m => OrgParser m String
+ key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+
+ value :: Monad m => OrgParser m String
+ value = skipSpaces *> manyTill anyChar endOfValue
+
+ endOfValue :: Monad m => OrgParser m ()
+ endOfValue =
+ lookAhead $ (() <$ try (many1 spaceChar <* key))
+ <|> () <$ newline
+
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
+orgBlock :: PandocMonad m => OrgParser m (F Blocks)
+orgBlock = try $ do
+ blockAttrs <- blockAttributes
+ blkType <- blockHeaderStart
+ ($ blkType) $
+ case (map toLower blkType) of
+ "export" -> exportBlock
+ "comment" -> rawBlockLines (const mempty)
+ "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
+ "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
+ "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
+ "example" -> rawBlockLines (return . exampleCode)
+ "quote" -> parseBlockLines (fmap B.blockQuote)
+ "verse" -> verseBlock
+ "src" -> codeBlock blockAttrs
+ _ -> parseBlockLines $
+ let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
+ in fmap $ B.divWith (ident, classes ++ [blkType], kv)
+ where
+ blockHeaderStart :: Monad m => OrgParser m String
+ blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
+
+ lowercase :: String -> String
+ lowercase = map toLower
+
+rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
+
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
+ where
+ parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
+ parsedBlockContent = try $ do
+ raw <- rawBlockContent blockType
+ parseFromString blocks (raw ++ "\n")
+
+-- | Read the raw string content of a block
+rawBlockContent :: Monad m => String -> OrgParser m String
+rawBlockContent blockType = try $ do
+ blkLines <- manyTill rawLine blockEnder
+ tabLen <- getOption readerTabStop
+ return
+ . unlines
+ . stripIndent
+ . map (tabsToSpaces tabLen . commaEscaped)
+ $ blkLines
+ where
+ rawLine :: Monad m => OrgParser m String
+ rawLine = try $ ("" <$ blankline) <|> anyLine
+
+ blockEnder :: Monad m => OrgParser m ()
+ blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
+
+ stripIndent :: [String] -> [String]
+ stripIndent strs = map (drop (shortestIndent strs)) strs
+
+ shortestIndent :: [String] -> Int
+ shortestIndent = foldr min maxBound
+ . map (length . takeWhile isSpace)
+ . filter (not . null)
+
+ tabsToSpaces :: Int -> String -> String
+ tabsToSpaces _ [] = []
+ tabsToSpaces tabLen cs'@(c:cs) =
+ case c of
+ ' ' -> ' ':tabsToSpaces tabLen cs
+ '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs
+ _ -> cs'
+
+ commaEscaped :: String -> String
+ commaEscaped (',':cs@('*':_)) = cs
+ commaEscaped (',':cs@('#':'+':_)) = cs
+ commaEscaped (' ':cs) = ' ':commaEscaped cs
+ commaEscaped ('\t':cs) = '\t':commaEscaped cs
+ commaEscaped cs = cs
+
+-- | Read but ignore all remaining block headers.
+ignHeaders :: Monad m => OrgParser m ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+-- | Read a block containing code intended for export in specific backends
+-- only.
+exportBlock :: Monad m => String -> OrgParser m (F Blocks)
+exportBlock blockType = try $ do
+ exportType <- skipSpaces *> orgArgWord <* ignHeaders
+ contents <- rawBlockContent blockType
+ returnF (B.rawBlock (map toLower exportType) contents)
+
+verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
+verseBlock blockType = try $ do
+ ignHeaders
+ content <- rawBlockContent blockType
+ fmap B.lineBlock . sequence
+ <$> mapM parseVerseLine (lines content)
+ where
+ -- replace initial spaces with nonbreaking spaces to preserve
+ -- indentation, parse the rest as normal inline
+ parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
+ parseVerseLine cs = do
+ let (initialSpaces, indentedLine) = span isSpace cs
+ let nbspIndent = if null initialSpaces
+ then mempty
+ else B.str $ map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine ++ "\n")
+ return (trimInlinesF $ pure nbspIndent <> line)
+
+-- | Read a code block and the associated results block if present. Which of
+-- boths blocks is included in the output is determined using the "exports"
+-- argument in the block header.
+codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+codeBlock blockAttrs blockType = do
+ skipSpaces
+ (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+ content <- rawBlockContent blockType
+ resultsContent <- trailingResultsBlock
+ let id' = fromMaybe mempty $ blockAttrName blockAttrs
+ let includeCode = exportsCode kv
+ let includeResults = exportsResults kv
+ let codeBlck = B.codeBlockWith ( id', classes, kv ) content
+ let labelledBlck = maybe (pure codeBlck)
+ (labelDiv codeBlck)
+ (blockAttrCaption blockAttrs)
+ let resultBlck = fromMaybe mempty resultsContent
+ return $
+ (if includeCode then labelledBlck else mempty) <>
+ (if includeResults then resultBlck else mempty)
+ where
+ labelDiv :: Blocks -> F Inlines -> F Blocks
+ labelDiv blk value =
+ B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+
+ labelledBlock :: F Inlines -> F Blocks
+ labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+ || ("rundoc-exports", "results") `elem` attrs)
+
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+ || ("rundoc-exports", "both") `elem` attrs
+
+trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks))
+trailingResultsBlock = optionMaybe . try $ do
+ blanklines
+ stringAnyCase "#+RESULTS:"
+ blankline
+ block
+
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
+codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
+codeHeaderArgs = try $ do
+ language <- skipSpaces *> orgArgWord
+ _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+ parameters <- manyTill blockOption newline
+ let pandocLang = translateLang language
+ return $
+ if hasRundocParameters parameters
+ then ( [ pandocLang, rundocBlockClass ]
+ , map toRundocAttrib (("language", language) : parameters)
+ )
+ else ([ pandocLang ], parameters)
+ where
+ hasRundocParameters = not . null
+
+switch :: Monad m => OrgParser m (Char, Maybe String)
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+ simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+ lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+ (string "-l \"" *> many1Till nonspaceChar (char '"'))
+
+blockOption :: Monad m => OrgParser m (String, String)
+blockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgParamValue
+ return (argKey, paramValue)
+
+orgParamValue :: Monad m => OrgParser m String
+orgParamValue = try $
+ skipSpaces
+ *> notFollowedBy (char ':' )
+ *> many1 nonspaceChar
+ <* skipSpaces
+
+horizontalRule :: Monad m => OrgParser m (F Blocks)
+horizontalRule = return B.horizontalRule <$ try hline
+
+
+--
+-- Drawers
+--
+
+-- | A generic drawer which has no special meaning for org-mode.
+-- Whether or not this drawer is included in the output depends on the drawers
+-- export setting.
+genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
+genericDrawer = try $ do
+ name <- map toUpper <$> drawerStart
+ content <- manyTill drawerLine (try drawerEnd)
+ state <- getState
+ -- Include drawer if it is explicitly included in or not explicitly excluded
+ -- from the list of drawers that should be exported. PROPERTIES drawers are
+ -- never exported.
+ case (exportDrawers . orgStateExportSettings $ state) of
+ _ | name == "PROPERTIES" -> return mempty
+ Left names | name `elem` names -> return mempty
+ Right names | name `notElem` names -> return mempty
+ _ -> drawerDiv name <$> parseLines content
+ where
+ parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
+ parseLines = parseFromString blocks . (++ "\n") . unlines
+
+ drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
+
+drawerLine :: Monad m => OrgParser m String
+drawerLine = anyLine
+
+drawerEnd :: Monad m => OrgParser m String
+drawerEnd = try $
+ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
+-- within.
+propertiesDrawer :: Monad m => OrgParser m Properties
+propertiesDrawer = try $ do
+ drawerType <- drawerStart
+ guard $ map toUpper drawerType == "PROPERTIES"
+ manyTill property (try drawerEnd)
+ where
+ property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
+ property = try $ (,) <$> key <*> value
+
+ key :: Monad m => OrgParser m PropertyKey
+ key = fmap toPropertyKey . try $
+ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+
+ value :: Monad m => OrgParser m PropertyValue
+ value = fmap toPropertyValue . try $
+ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+
+
+--
+-- Figures
+--
+
+-- | Figures or an image paragraph (i.e. an image on a line by itself). Only
+-- images with a caption attribute are interpreted as figures.
+figure :: PandocMonad m => OrgParser m (F Blocks)
+figure = try $ do
+ figAttrs <- blockAttributes
+ src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
+ case cleanLinkString src of
+ Nothing -> mzero
+ Just imgSrc -> do
+ guard (isImageFilename imgSrc)
+ let isFigure = not . isNothing $ blockAttrCaption figAttrs
+ return $ imageBlock isFigure figAttrs imgSrc
+ where
+ selfTarget :: PandocMonad m => OrgParser m String
+ selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+ imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
+ imageBlock isFigure figAttrs imgSrc =
+ let
+ figName = fromMaybe mempty $ blockAttrName figAttrs
+ figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
+ figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+ figKeyVals = blockAttrKeyValues figAttrs
+ attr = (figLabel, mempty, figKeyVals)
+ figTitle = (if isFigure then withFigPrefix else id) figName
+ in
+ B.para . B.imageWith attr imgSrc figTitle <$> figCaption
+
+ withFigPrefix :: String -> String
+ withFigPrefix cs =
+ if "fig:" `isPrefixOf` cs
+ then cs
+ else "fig:" ++ cs
+
+-- | Succeeds if looking at the end of the current paragraph
+endOfParagraph :: Monad m => OrgParser m ()
+endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
+
+
+--
+-- Examples
+--
+
+-- | Example code marked up by a leading colon.
+example :: Monad m => OrgParser m (F Blocks)
+example = try $ do
+ return . return . exampleCode =<< unlines <$> many1 exampleLine
+ where
+ exampleLine :: Monad m => OrgParser m String
+ exampleLine = try $ exampleLineStart *> anyLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
+
+
+--
+-- Comments, Options and Metadata
+--
+
+specialLine :: PandocMonad m => OrgParser m (F Blocks)
+specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
+
+rawExportLine :: PandocMonad m => OrgParser m Blocks
+rawExportLine = try $ do
+ metaLineStart
+ key <- metaKey
+ if key `elem` ["latex", "html", "texinfo", "beamer"]
+ then B.rawBlock key <$> anyLine
+ else mzero
+
+commentLine :: Monad m => OrgParser m Blocks
+commentLine = commentLineStart *> anyLine *> pure mempty
+
+
+--
+-- Tables
+--
+data ColumnProperty = ColumnProperty
+ { columnAlignment :: Maybe Alignment
+ , columnRelWidth :: Maybe Int
+ } deriving (Show, Eq)
+
+instance Default ColumnProperty where
+ def = ColumnProperty Nothing Nothing
+
+data OrgTableRow = OrgContentRow (F [Blocks])
+ | OrgAlignRow [ColumnProperty]
+ | OrgHlineRow
+
+-- OrgTable is strongly related to the pandoc table ADT. Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad. The final table
+-- should be generated using a builder function.
+data OrgTable = OrgTable
+ { orgTableColumnProperties :: [ColumnProperty]
+ , orgTableHeader :: [Blocks]
+ , orgTableRows :: [[Blocks]]
+ }
+
+table :: PandocMonad m => OrgParser m (F Blocks)
+table = try $ do
+ blockAttrs <- blockAttributes
+ lookAhead tableStart
+ do
+ rows <- tableRows
+ let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
+ return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
+
+orgToPandocTable :: OrgTable
+ -> Inlines
+ -> Blocks
+orgToPandocTable (OrgTable colProps heads lns) caption =
+ let totalWidth = if any (not . isNothing) (map columnRelWidth colProps)
+ then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
+ else Nothing
+ in B.table caption (map (convertColProp totalWidth) colProps) heads lns
+ where
+ convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double)
+ convertColProp totalWidth colProp =
+ let
+ align' = fromMaybe AlignDefault $ columnAlignment colProp
+ width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
+ <$> (columnRelWidth colProp)
+ <*> totalWidth
+ in (align', width')
+
+tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
+tableContentRow = try $
+ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
+
+tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
+tableContentCell = try $
+ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
+
+tableAlignRow :: Monad m => OrgParser m OrgTableRow
+tableAlignRow = try $ do
+ tableStart
+ colProps <- many1Till columnPropertyCell newline
+ -- Empty rows are regular (i.e. content) rows, not alignment rows.
+ guard $ any (/= def) colProps
+ return $ OrgAlignRow colProps
+
+columnPropertyCell :: Monad m => OrgParser m ColumnProperty
+columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
+ where
+ emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell)
+ propCell = try $ ColumnProperty
+ <$> (skipSpaces
+ *> char '<'
+ *> optionMaybe tableAlignFromChar)
+ <*> (optionMaybe (many1 digit >>= safeRead)
+ <* char '>'
+ <* emptyCell)
+
+tableAlignFromChar :: Monad m => OrgParser m Alignment
+tableAlignFromChar = try $
+ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
+
+tableHline :: Monad m => OrgParser m OrgTableRow
+tableHline = try $
+ OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+endOfCell :: Monad m => OrgParser m Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
+rowsToTable :: [OrgTableRow]
+ -> F OrgTable
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
+
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable colProps heads rows) =
+ OrgTable colProps' heads rows
+ where
+ refRow = if heads /= mempty
+ then heads
+ else case rows of
+ (r:_) -> r
+ _ -> mempty
+ cols = length refRow
+ fillColumns base padding = take cols $ base ++ repeat padding
+ colProps' = fillColumns colProps def
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header. All other horizontal lines are discarded.
+rowToContent :: OrgTable
+ -> OrgTableRow
+ -> F OrgTable
+rowToContent orgTable row =
+ case row of
+ OrgHlineRow -> return singleRowPromotedToHeader
+ OrgAlignRow props -> return . setProperties $ props
+ OrgContentRow cs -> appendToBody cs
+ where
+ singleRowPromotedToHeader :: OrgTable
+ singleRowPromotedToHeader = case orgTable of
+ OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+ orgTable{ orgTableHeader = b , orgTableRows = [] }
+ _ -> orgTable
+
+ setProperties :: [ColumnProperty] -> OrgTable
+ setProperties ps = orgTable{ orgTableColumnProperties = ps }
+
+ appendToBody :: F [Blocks] -> F OrgTable
+ appendToBody frow = do
+ newRow <- frow
+ let oldRows = orgTableRows orgTable
+ -- NOTE: This is an inefficient O(n) operation. This should be changed
+ -- if performance ever becomes a problem.
+ return orgTable{ orgTableRows = oldRows ++ [newRow] }
+
+
+--
+-- LaTeX fragments
+--
+latexFragment :: Monad m => OrgParser m (F Blocks)
+latexFragment = try $ do
+ envName <- latexEnvStart
+ content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
+ return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ where
+ c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
+ , c
+ , "\\end{", e, "}\n"
+ ]
+
+latexEnd :: Monad m => String -> OrgParser m ()
+latexEnd envName = try $
+ () <$ skipSpaces
+ <* string ("\\end{" ++ envName ++ "}")
+ <* blankline
+
+
+--
+-- Footnote defintions
+--
+noteBlock :: PandocMonad m => OrgParser m (F Blocks)
+noteBlock = try $ do
+ ref <- noteMarker <* skipSpaces
+ content <- mconcat <$> blocksTillHeaderOrNote
+ addToNotesTable (ref, content)
+ return mempty
+ where
+ blocksTillHeaderOrNote =
+ many1Till block (eof <|> () <$ lookAhead noteMarker
+ <|> () <$ lookAhead headerStart)
+
+-- Paragraphs or Plain text
+paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
+paraOrPlain = try $ do
+ -- Make sure we are not looking at a headline
+ notFollowedBy' (char '*' *> (oneOf " *"))
+ ils <- inlines
+ nl <- option False (newline *> return True)
+ -- Read block as paragraph, except if we are in a list context and the block
+ -- is directly followed by a list item, in which case the block is read as
+ -- plain text.
+ try (guard nl
+ *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
+ *> return (B.para <$> ils))
+ <|> (return (B.plain <$> ils))
+
+
+--
+-- list blocks
+--
+
+list :: PandocMonad m => OrgParser m (F Blocks)
+list = choice [ definitionList, bulletList, orderedList ] <?> "list"
+
+definitionList :: PandocMonad m => OrgParser m (F Blocks)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.definitionList . fmap compactifyDL . sequence
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
+
+bulletList :: PandocMonad m => OrgParser m (F Blocks)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.bulletList . fmap compactify . sequence
+ <$> many1 (listItem (bulletListStart' $ Just n))
+
+orderedList :: PandocMonad m => OrgParser m (F Blocks)
+orderedList = fmap B.orderedList . fmap compactify . sequence
+ <$> many1 (listItem orderedListStart)
+
+bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- length <$> many spaceChar
+ oneOf (bullets $ ind == 0)
+ skipSpaces1
+ return (ind + 1)
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ oneOf (bullets $ n == 1)
+ many1 spaceChar
+ return n
+
+-- Unindented lists are legal, but they can't use '*' bullets.
+-- We return n to maintain compatibility with the generic listItem.
+bullets :: Bool -> String
+bullets unindented = if unindented then "+-" else "*+-"
+
+definitionListItem :: PandocMonad m
+ => OrgParser m Int
+ -> OrgParser m (F (Inlines, [Blocks]))
+definitionListItem parseMarkerGetLength = try $ do
+ markerLength <- parseMarkerGetLength
+ term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ line1 <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ cont <- concat <$> many (listContinuation markerLength)
+ term' <- parseFromString inlines term
+ contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+ definitionMarker =
+ spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
+
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: PandocMonad m
+ => OrgParser m Int
+ -> OrgParser m (F Blocks)
+listItem start = try . withContext ListItemState $ do
+ markerLength <- try start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ rest <- concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Monad m => Int
+ -> OrgParser m String
+listContinuation markerLength = try $
+ notFollowedBy' blankline
+ *> (mappend <$> (concat <$> many1 listLine)
+ <*> many blankline)
+ where
+ listLine = try $ indentWith markerLength *> anyLineNewline
+
+ -- indent by specified number of spaces (or equiv. tabs)
+ indentWith :: Monad m => Int -> OrgParser m String
+ indentWith num = do
+ tabStop <- getOption readerTabStop
+ if num < tabStop
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+-- | Parse any line, include the final newline in the output.
+anyLineNewline :: Monad m => OrgParser m String
+anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
new file mode 100644
index 000000000..391877c03
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -0,0 +1,172 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode export options.
+-}
+module Text.Pandoc.Readers.Org.ExportSettings
+ ( exportSettings
+ ) where
+
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
+import Control.Monad ( mzero, void )
+import Data.Char ( toLower )
+import Data.Maybe ( listToMaybe )
+
+-- | Read and handle space separated org-mode export settings.
+exportSettings :: Monad m => OrgParser m ()
+exportSettings = void $ sepBy spaces exportSetting
+
+-- | Setter function for export settings.
+type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
+
+-- | Read and process a single org-mode export option.
+exportSetting :: Monad m => OrgParser m ()
+exportSetting = choice
+ [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val })
+ , booleanSetting "'" (\val es -> es { exportSmartQuotes = val })
+ , booleanSetting "*" (\val es -> es { exportEmphasizedText = val })
+ , booleanSetting "-" (\val es -> es { exportSpecialStrings = val })
+ , ignoredSetting ":"
+ , ignoredSetting "<"
+ , ignoredSetting "\\n"
+ , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val })
+ , booleanSetting "author" (\val es -> es { exportWithAuthor = val })
+ , ignoredSetting "c"
+ -- org-mode allows the special value `comment` for creator, which we'll
+ -- interpret as true as it doesn't make sense in the context of Pandoc.
+ , booleanSetting "creator" (\val es -> es { exportWithCreator = val })
+ , complementableListSetting "d" (\val es -> es { exportDrawers = val })
+ , ignoredSetting "date"
+ , ignoredSetting "e"
+ , booleanSetting "email" (\val es -> es { exportWithEmail = val })
+ , ignoredSetting "f"
+ , integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
+ , ignoredSetting "inline"
+ , ignoredSetting "num"
+ , ignoredSetting "p"
+ , ignoredSetting "pri"
+ , ignoredSetting "prop"
+ , ignoredSetting "stat"
+ , ignoredSetting "tags"
+ , ignoredSetting "tasks"
+ , ignoredSetting "tex"
+ , ignoredSetting "timestamp"
+ , ignoredSetting "title"
+ , ignoredSetting "toc"
+ , booleanSetting "todo" (\val es -> es { exportWithTodoKeywords = val })
+ , ignoredSetting "|"
+ ] <?> "export setting"
+
+genericExportSetting :: Monad m
+ => OrgParser m a
+ -> String
+ -> ExportSettingSetter a
+ -> OrgParser m ()
+genericExportSetting optionParser settingIdentifier setter = try $ do
+ _ <- string settingIdentifier *> char ':'
+ value <- optionParser
+ updateState $ modifyExportSettings value
+ where
+ modifyExportSettings val st =
+ st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
+
+-- | A boolean option, either nil (False) or non-nil (True).
+booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
+booleanSetting = genericExportSetting elispBoolean
+
+-- | An integer-valued option.
+integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
+integerSetting = genericExportSetting parseInt
+ where
+ parseInt = try $
+ many1 digit >>= maybe mzero (return . fst) . listToMaybe . reads
+
+-- | Either the string "headline" or an elisp boolean and treated as an
+-- @ArchivedTreesOption@.
+archivedTreeSetting :: Monad m
+ => String
+ -> ExportSettingSetter ArchivedTreesOption
+ -> OrgParser m ()
+archivedTreeSetting =
+ genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean
+ where
+ archivedTreesHeadlineSetting = try $ do
+ _ <- string "headline"
+ lookAhead (newline <|> spaceChar)
+ return ArchivedTreesHeadlineOnly
+
+ archivedTreesBoolean = try $ do
+ exportBool <- elispBoolean
+ return $
+ if exportBool
+ then ArchivedTreesExport
+ else ArchivedTreesNoExport
+
+-- | A list or a complement list (i.e. a list starting with `not`).
+complementableListSetting :: Monad m
+ => String
+ -> ExportSettingSetter (Either [String] [String])
+ -> OrgParser m ()
+complementableListSetting = genericExportSetting $ choice
+ [ Left <$> complementStringList
+ , Right <$> stringList
+ , (\b -> if b then Left [] else Right []) <$> elispBoolean
+ ]
+ where
+ -- Read a plain list of strings.
+ stringList :: Monad m => OrgParser m [String]
+ stringList = try $
+ char '('
+ *> sepBy elispString spaces
+ <* char ')'
+
+ -- Read an emacs lisp list specifying a complement set.
+ complementStringList :: Monad m => OrgParser m [String]
+ complementStringList = try $
+ string "(not "
+ *> sepBy elispString spaces
+ <* char ')'
+
+ elispString :: Monad m => OrgParser m String
+ elispString = try $
+ char '"'
+ *> manyTill alphaNum (char '"')
+
+-- | Read but ignore the export setting.
+ignoredSetting :: Monad m => String -> OrgParser m ()
+ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+
+-- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
+-- interpreted as true.
+elispBoolean :: Monad m => OrgParser m Bool
+elispBoolean = try $ do
+ value <- many1 nonspaceChar
+ return $ case map toLower value of
+ "nil" -> False
+ "{}" -> False
+ "()" -> False
+ _ -> True
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
new file mode 100644
index 000000000..f3671641a
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -0,0 +1,880 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.Inlines
+ ( inline
+ , inlines
+ , addToNotesTable
+ , linkTarget
+ ) where
+
+import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker )
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+import Text.Pandoc.Readers.Org.Shared
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines )
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
+import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
+import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
+import Text.Pandoc.Class (PandocMonad)
+
+import Prelude hiding (sequence)
+import Control.Monad ( guard, mplus, mzero, when, void )
+import Control.Monad.Trans ( lift )
+import Data.Char ( isAlphaNum, isSpace )
+import Data.List ( intersperse )
+import Data.Maybe ( fromMaybe )
+import qualified Data.Map as M
+import Data.Monoid ( (<>) )
+import Data.Traversable (sequence)
+
+--
+-- Functions acting on the parser state
+--
+recordAnchorId :: PandocMonad m => String -> OrgParser m ()
+recordAnchorId i = updateState $ \s ->
+ s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
+pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m ()
+pushToInlineCharStack c = updateState $ \s ->
+ s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
+
+popInlineCharStack :: PandocMonad m => OrgParser m ()
+popInlineCharStack = updateState $ \s ->
+ s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
+
+surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char]
+surroundingEmphasisChar =
+ take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+
+startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m ()
+startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = Just maxNewlines }
+
+decEmphasisNewlinesCount :: PandocMonad m => OrgParser m ()
+decEmphasisNewlinesCount = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+
+newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool
+newlinesCountWithinLimits = do
+ st <- getState
+ return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
+
+resetEmphasisNewlines :: PandocMonad m => OrgParser m ()
+resetEmphasisNewlines = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = Nothing }
+
+addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m ()
+addToNotesTable note = do
+ oldnotes <- orgStateNotes' <$> getState
+ updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
+
+-- | Parse a single Org-mode inline element
+inline :: PandocMonad m => OrgParser m (F Inlines)
+inline =
+ choice [ whitespace
+ , linebreak
+ , cite
+ , footnote
+ , linkOrImage
+ , anchor
+ , inlineCodeBlock
+ , str
+ , endline
+ , emphasizedText
+ , code
+ , math
+ , displayMath
+ , verbatim
+ , subscript
+ , superscript
+ , inlineLaTeX
+ , exportSnippet
+ , smart
+ , symbol
+ ] <* (guard =<< newlinesCountWithinLimits)
+ <?> "inline"
+
+-- | Read the rest of the input as inlines.
+inlines :: PandocMonad m => OrgParser m (F Inlines)
+inlines = trimInlinesF . mconcat <$> many1 inline
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~"
+
+
+whitespace :: PandocMonad m => OrgParser m (F Inlines)
+whitespace = pure B.space <$ skipMany1 spaceChar
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ <?> "whitespace"
+
+linebreak :: PandocMonad m => OrgParser m (F Inlines)
+linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+
+str :: PandocMonad m => OrgParser m (F Inlines)
+str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+ <* updateLastStrPos
+
+-- | An endline character that can be treated as a space, not a structural
+-- break. This should reflect the values of the Emacs variable
+-- @org-element-pagaraph-separate@.
+endline :: PandocMonad m => OrgParser m (F Inlines)
+endline = try $ do
+ newline
+ notFollowedBy' endOfBlock
+ decEmphasisNewlinesCount
+ guard =<< newlinesCountWithinLimits
+ updateLastPreCharPos
+ return . return $ B.softbreak
+
+
+--
+-- Citations
+--
+
+-- The state of citations is a bit confusing due to the lack of an official
+-- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
+-- first to be implemented here and is almost identical to Markdown's citation
+-- syntax. The org-ref package is in wide use to handle citations, but the
+-- syntax is a bit limiting and not quite as simple to write. The
+-- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc
+-- sytax and Org-oriented enhancements contributed by Richard Lawrence and
+-- others. It's dubbed Berkeley syntax due the place of activity of its main
+-- contributors. All this should be consolidated once an official Org-mode
+-- citation syntax has emerged.
+
+cite :: PandocMonad m => OrgParser m (F Inlines)
+cite = try $ berkeleyCite <|> do
+ guardEnabled Ext_citations
+ (cs, raw) <- withRaw $ choice
+ [ pandocOrgCite
+ , orgRefCite
+ , berkeleyTextualCite
+ ]
+ return $ (flip B.cite (B.text raw)) <$> cs
+
+-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
+pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
+pandocOrgCite = try $
+ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
+
+orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
+orgRefCite = try $ choice
+ [ normalOrgRefCite
+ , fmap (:[]) <$> linkLikeOrgRefCite
+ ]
+
+normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation])
+normalOrgRefCite = try $ do
+ mode <- orgRefCiteMode
+ firstCitation <- orgRefCiteList mode
+ moreCitations <- many (try $ char ',' *> orgRefCiteList mode)
+ return . sequence $ firstCitation : moreCitations
+ where
+ -- | A list of org-ref style citation keys, parsed as citation of the given
+ -- citation mode.
+ orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation)
+ orgRefCiteList citeMode = try $ do
+ key <- orgRefCiteKey
+ returnF $ Citation
+ { citationId = key
+ , citationPrefix = mempty
+ , citationSuffix = mempty
+ , citationMode = citeMode
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
+-- develop and adjusted to Org-mode style by John MacFarlane and Richard
+-- Lawrence, respectively, both philosophers at UC Berkeley.
+berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
+berkeleyCite = try $ do
+ bcl <- berkeleyCitationList
+ return $ do
+ parens <- berkeleyCiteParens <$> bcl
+ prefix <- berkeleyCiteCommonPrefix <$> bcl
+ suffix <- berkeleyCiteCommonSuffix <$> bcl
+ citationList <- berkeleyCiteCitations <$> bcl
+ return $
+ if parens
+ then toCite
+ . maybe id (\p -> alterFirst (prependPrefix p)) prefix
+ . maybe id (\s -> alterLast (appendSuffix s)) suffix
+ $ citationList
+ else maybe mempty (<> " ") prefix
+ <> (toListOfCites $ map toInTextMode citationList)
+ <> maybe mempty (", " <>) suffix
+ where
+ toCite :: [Citation] -> Inlines
+ toCite cs = B.cite cs mempty
+
+ toListOfCites :: [Citation] -> Inlines
+ toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
+
+ toInTextMode :: Citation -> Citation
+ toInTextMode c = c { citationMode = AuthorInText }
+
+ alterFirst, alterLast :: (a -> a) -> [a] -> [a]
+ alterFirst _ [] = []
+ alterFirst f (c:cs) = (f c):cs
+ alterLast f = reverse . alterFirst f . reverse
+
+ prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
+ prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
+ appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
+
+data BerkeleyCitationList = BerkeleyCitationList
+ { berkeleyCiteParens :: Bool
+ , berkeleyCiteCommonPrefix :: Maybe Inlines
+ , berkeleyCiteCommonSuffix :: Maybe Inlines
+ , berkeleyCiteCitations :: [Citation]
+ }
+berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
+berkeleyCitationList = try $ do
+ char '['
+ parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
+ char ':'
+ skipSpaces
+ commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
+ citations <- citeList
+ commonSuffix <- optionMaybe (try $ citationListPart)
+ char ']'
+ return (BerkeleyCitationList parens
+ <$> sequence commonPrefix
+ <*> sequence commonSuffix
+ <*> citations)
+ where
+ citationListPart :: PandocMonad m => OrgParser m (F Inlines)
+ citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
+ notFollowedBy' citeKey
+ notFollowedBy (oneOf ";]")
+ inline
+
+berkeleyBareTag :: PandocMonad m => OrgParser m ()
+berkeleyBareTag = try $ void berkeleyBareTag'
+
+berkeleyParensTag :: PandocMonad m => OrgParser m ()
+berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag'
+
+berkeleyBareTag' :: PandocMonad m => OrgParser m ()
+berkeleyBareTag' = try $ void (string "cite")
+
+berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
+berkeleyTextualCite = try $ do
+ (suppressAuthor, key) <- citeKey
+ returnF . return $ Citation
+ { citationId = key
+ , citationPrefix = mempty
+ , citationSuffix = mempty
+ , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+-- The following is what a Berkeley-style bracketed textual citation parser
+-- would look like. However, as these citations are a subset of Pandoc's Org
+-- citation style, this isn't used.
+-- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
+-- berkeleyBracketedTextualCite = try . (fmap head) $
+-- enclosedByPair '[' ']' berkeleyTextualCite
+
+-- | Read a link-like org-ref style citation. The citation includes pre and
+-- post text. However, multiple citations are not possible due to limitations
+-- in the syntax.
+linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation)
+linkLikeOrgRefCite = try $ do
+ _ <- string "[["
+ mode <- orgRefCiteMode
+ key <- orgRefCiteKey
+ _ <- string "]["
+ pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::")
+ spc <- option False (True <$ spaceChar)
+ suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]")
+ return $ do
+ pre' <- pre
+ suf' <- suf
+ return Citation
+ { citationId = key
+ , citationPrefix = B.toList pre'
+ , citationSuffix = B.toList (if spc then B.space <> suf' else suf')
+ , citationMode = mode
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+-- | Read a citation key. The characters allowed in citation keys are taken
+-- from the `org-ref-cite-re` variable in `org-ref.el`.
+orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey = try . many1 . satisfy $ \c ->
+ isAlphaNum c || c `elem` ("-_:\\./"::String)
+
+-- | Supported citation types. Only a small subset of org-ref types is
+-- supported for now. TODO: rewrite this, use LaTeX reader as template.
+orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode
+orgRefCiteMode =
+ choice $ map (\(s, mode) -> mode <$ try (string s <* char ':'))
+ [ ("cite", AuthorInText)
+ , ("citep", NormalCitation)
+ , ("citep*", NormalCitation)
+ , ("citet", AuthorInText)
+ , ("citet*", AuthorInText)
+ , ("citeyear", SuppressAuthor)
+ ]
+
+citeList :: PandocMonad m => OrgParser m (F [Citation])
+citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: PandocMonad m => OrgParser m (F Citation)
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ where
+ prefix = trimInlinesF . mconcat <$>
+ manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+ suffix = try $ do
+ hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+ skipSpaces
+ rest <- trimInlinesF . mconcat <$>
+ many (notFollowedBy (oneOf ";]") *> inline)
+ return $ if hasSpace
+ then (B.space <>) <$> rest
+ else rest
+
+footnote :: PandocMonad m => OrgParser m (F Inlines)
+footnote = try $ inlineNote <|> referencedNote
+
+inlineNote :: PandocMonad m => OrgParser m (F Inlines)
+inlineNote = try $ do
+ string "[fn:"
+ ref <- many alphaNum
+ char ':'
+ note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+ when (not $ null ref) $
+ addToNotesTable ("fn:" ++ ref, note)
+ return $ B.note <$> note
+
+referencedNote :: PandocMonad m => OrgParser m (F Inlines)
+referencedNote = try $ do
+ ref <- noteMarker
+ return $ do
+ notes <- asksF orgStateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Just contents -> do
+ st <- askF
+ let contents' = runF contents st{ orgStateNotes' = [] }
+ return $ B.note contents'
+
+linkOrImage :: PandocMonad m => OrgParser m (F Inlines)
+linkOrImage = explicitOrImageLink
+ <|> selflinkOrImage
+ <|> angleLink
+ <|> plainLink
+ <?> "link or image"
+
+explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines)
+explicitOrImageLink = try $ do
+ char '['
+ srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
+ title <- enclosedRaw (char '[') (char ']')
+ title' <- parseFromString (mconcat <$> many inline) title
+ char ']'
+ return $ do
+ src <- srcF
+ case cleanLinkString title of
+ Just imgSrc | isImageFilename imgSrc ->
+ pure $ B.link src "" $ B.image imgSrc mempty mempty
+ _ ->
+ linkToInlinesF src =<< title'
+
+selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
+selflinkOrImage = try $ do
+ src <- char '[' *> linkTarget <* char ']'
+ return $ linkToInlinesF src (B.str src)
+
+plainLink :: PandocMonad m => OrgParser m (F Inlines)
+plainLink = try $ do
+ (orig, src) <- uri
+ returnF $ B.link src "" (B.str orig)
+
+angleLink :: PandocMonad m => OrgParser m (F Inlines)
+angleLink = try $ do
+ char '<'
+ link <- plainLink
+ char '>'
+ return link
+
+linkTarget :: PandocMonad m => OrgParser m String
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
+
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
+applyCustomLinkFormat :: String -> OrgParser m (F String)
+applyCustomLinkFormat link = do
+ let (linkType, rest) = break (== ':') link
+ return $ do
+ formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
+ return $ maybe link ($ drop 1 rest) formatter
+
+-- | Take a link and return a function which produces new inlines when given
+-- description inlines.
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF linkStr =
+ case linkStr of
+ "" -> pure . B.link mempty "" -- wiki link (empty by convention)
+ ('#':_) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkString linkStr of
+ (Just cleanedLink) -> if isImageFilename cleanedLink
+ then const . pure $ B.image cleanedLink "" ""
+ else pure . B.link cleanedLink ""
+ Nothing -> internalLink linkStr -- other internal link
+
+internalLink :: String -> Inlines -> F Inlines
+internalLink link title = do
+ anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
+ if anchorB
+ then return $ B.link ('#':link) "" title
+ else return $ B.emph title
+
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal. Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: PandocMonad m => OrgParser m (F Inlines)
+anchor = try $ do
+ anchorId <- parseAnchor
+ recordAnchorId anchorId
+ returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ where
+ parseAnchor = string "<<"
+ *> many1 (noneOf "\t\n\r<>\"' ")
+ <* string ">>"
+ <* skipSpaces
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+ | isAlphaNum c = c
+ | c `elem` ("_.-:" :: String) = c
+ | otherwise = '-'
+
+-- | Parses an inline code block and marks it as an babel block.
+inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
+inlineCodeBlock = try $ do
+ string "src_"
+ lang <- many1 orgArgWordChar
+ opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+ inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
+ let attrClasses = [translateLang lang, rundocBlockClass]
+ let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
+ returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ where
+ inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
+ inlineBlockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgInlineParamValue
+ return (argKey, paramValue)
+
+ orgInlineParamValue :: PandocMonad m => OrgParser m String
+ orgInlineParamValue = try $
+ skipSpaces
+ *> notFollowedBy (char ':')
+ *> many1 (noneOf "\t\n\r ]")
+ <* skipSpaces
+
+
+emphasizedText :: PandocMonad m => OrgParser m (F Inlines)
+emphasizedText = do
+ state <- getState
+ guard . exportEmphasizedText . orgStateExportSettings $ state
+ try $ choice
+ [ emph
+ , strong
+ , strikeout
+ , underline
+ ]
+
+enclosedByPair :: PandocMonad m
+ => Char -- ^ opening char
+ -> Char -- ^ closing char
+ -> OrgParser m a -- ^ parser
+ -> OrgParser m [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
+
+emph :: PandocMonad m => OrgParser m (F Inlines)
+emph = fmap B.emph <$> emphasisBetween '/'
+
+strong :: PandocMonad m => OrgParser m (F Inlines)
+strong = fmap B.strong <$> emphasisBetween '*'
+
+strikeout :: PandocMonad m => OrgParser m (F Inlines)
+strikeout = fmap B.strikeout <$> emphasisBetween '+'
+
+-- There is no underline, so we use strong instead.
+underline :: PandocMonad m => OrgParser m (F Inlines)
+underline = fmap B.strong <$> emphasisBetween '_'
+
+verbatim :: PandocMonad m => OrgParser m (F Inlines)
+verbatim = return . B.code <$> verbatimBetween '='
+
+code :: PandocMonad m => OrgParser m (F Inlines)
+code = return . B.code <$> verbatimBetween '~'
+
+subscript :: PandocMonad m => OrgParser m (F Inlines)
+subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+
+superscript :: PandocMonad m => OrgParser m (F Inlines)
+superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+
+math :: PandocMonad m => OrgParser m (F Inlines)
+math = return . B.math <$> choice [ math1CharBetween '$'
+ , mathStringBetween '$'
+ , rawMathBetween "\\(" "\\)"
+ ]
+
+displayMath :: PandocMonad m => OrgParser m (F Inlines)
+displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+ , rawMathBetween "$$" "$$"
+ ]
+
+updatePositions :: PandocMonad m
+ => Char
+ -> OrgParser m Char
+updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
+
+symbol :: PandocMonad m => OrgParser m (F Inlines)
+symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+
+emphasisBetween :: PandocMonad m
+ => Char
+ -> OrgParser m (F Inlines)
+emphasisBetween c = try $ do
+ startEmphasisNewlinesCounting emphasisAllowedNewlines
+ res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
+ isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
+ when isTopLevelEmphasis
+ resetEmphasisNewlines
+ return res
+
+verbatimBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
+verbatimBetween c = try $
+ emphasisStart c *>
+ many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
+ where
+ verbatimChar = noneOf "\n\r" >>= updatePositions
+
+-- | Parses a raw string delimited by @c@ using Org's math rules
+mathStringBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
+mathStringBetween c = try $ do
+ mathStart c
+ body <- many1TillNOrLessNewlines mathAllowedNewlines
+ (noneOf (c:"\n\r"))
+ (lookAhead $ mathEnd c)
+ final <- mathEnd c
+ return $ body ++ [final]
+
+-- | Parse a single character between @c@ using math rules
+math1CharBetween :: PandocMonad m
+ => Char
+ -> OrgParser m String
+math1CharBetween c = try $ do
+ char c
+ res <- noneOf $ c:mathForbiddenBorderChars
+ char c
+ eof <|> () <$ lookAhead (oneOf mathPostChars)
+ return [res]
+
+rawMathBetween :: PandocMonad m
+ => String
+ -> String
+ -> OrgParser m String
+rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+
+-- | Parses the start (opening character) of emphasis
+emphasisStart :: PandocMonad m => Char -> OrgParser m Char
+emphasisStart c = try $ do
+ guard =<< afterEmphasisPreChar
+ guard =<< notAfterString
+ char c
+ lookAhead (noneOf emphasisForbiddenBorderChars)
+ pushToInlineCharStack c
+ -- nested inlines are allowed, so mark this position as one which might be
+ -- followed by another inline.
+ updateLastPreCharPos
+ return c
+
+-- | Parses the closing character of emphasis
+emphasisEnd :: PandocMonad m => Char -> OrgParser m Char
+emphasisEnd c = try $ do
+ guard =<< notAfterForbiddenBorderChar
+ char c
+ eof <|> () <$ lookAhead acceptablePostChars
+ updateLastStrPos
+ popInlineCharStack
+ return c
+ where acceptablePostChars =
+ surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
+
+mathStart :: PandocMonad m => Char -> OrgParser m Char
+mathStart c = try $
+ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
+
+mathEnd :: PandocMonad m => Char -> OrgParser m Char
+mathEnd c = try $ do
+ res <- noneOf (c:mathForbiddenBorderChars)
+ char c
+ eof <|> () <$ lookAhead (oneOf mathPostChars)
+ return res
+
+
+enclosedInlines :: PandocMonad m => OrgParser m a
+ -> OrgParser m b
+ -> OrgParser m (F Inlines)
+enclosedInlines start end = try $
+ trimInlinesF . mconcat <$> enclosed start end inline
+
+enclosedRaw :: PandocMonad m => OrgParser m a
+ -> OrgParser m b
+ -> OrgParser m String
+enclosedRaw start end = try $
+ start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ spanningTwoLines = try $
+ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
+-- newlines.
+many1TillNOrLessNewlines :: PandocMonad m => Int
+ -> OrgParser m Char
+ -> OrgParser m a
+ -> OrgParser m String
+many1TillNOrLessNewlines n p end = try $
+ nMoreLines (Just n) mempty >>= oneOrMore
+ where
+ nMoreLines Nothing cs = return cs
+ nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
+ nMoreLines k cs = try $ (final k cs <|> rest k cs)
+ >>= uncurry nMoreLines
+ final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
+ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
+ finalLine = try $ manyTill p end
+ minus1 k = k - 1
+ oneOrMore cs = guard (not $ null cs) *> return cs
+
+-- Org allows customization of the way it reads emphasis. We use the defaults
+-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
+-- for details).
+
+-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
+emphasisPreChars :: [Char]
+emphasisPreChars = "\t \"'({"
+
+-- | Chars allowed at after emphasis
+emphasisPostChars :: [Char]
+emphasisPostChars = "\t\n !\"'),-.:;?\\}"
+
+-- | Chars not allowed at the (inner) border of emphasis
+emphasisForbiddenBorderChars :: [Char]
+emphasisForbiddenBorderChars = "\t\n\r \"',"
+
+-- | The maximum number of newlines within
+emphasisAllowedNewlines :: Int
+emphasisAllowedNewlines = 1
+
+-- LaTeX-style math: see `org-latex-regexps` for details
+
+-- | Chars allowed after an inline ($...$) math statement
+mathPostChars :: [Char]
+mathPostChars = "\t\n \"'),-.:;?"
+
+-- | Chars not allowed at the (inner) border of math
+mathForbiddenBorderChars :: [Char]
+mathForbiddenBorderChars = "\t\n\r ,;.$"
+
+-- | Maximum number of newlines in an inline math statement
+mathAllowedNewlines :: Int
+mathAllowedNewlines = 2
+
+-- | Whether we are right behind a char allowed before emphasis
+afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool
+afterEmphasisPreChar = do
+ pos <- getPosition
+ lastPrePos <- orgStateLastPreCharPos <$> getState
+ return . fromMaybe True $ (== pos) <$> lastPrePos
+
+-- | Whether the parser is right after a forbidden border char
+notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool
+notAfterForbiddenBorderChar = do
+ pos <- getPosition
+ lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
+ return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
+subOrSuperExpr = try $
+ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
+ , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
+ , simpleSubOrSuperString
+ ] >>= parseFromString (mconcat <$> many inline)
+ where enclosing (left, right) s = left : s ++ [right]
+
+simpleSubOrSuperString :: PandocMonad m => OrgParser m String
+simpleSubOrSuperString = try $ do
+ state <- getState
+ guard . exportSubSuperscripts . orgStateExportSettings $ state
+ choice [ string "*"
+ , mappend <$> option [] ((:[]) <$> oneOf "+-")
+ <*> many1 alphaNum
+ ]
+
+inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
+inlineLaTeX = try $ do
+ cmd <- inlineLaTeXCommand
+ ils <- (lift . lift) $ parseAsInlineLaTeX cmd
+ maybe mzero returnF $
+ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils
+ where
+ parseAsMath :: String -> Maybe Inlines
+ parseAsMath cs = B.fromList <$> texMathToPandoc cs
+
+ parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
+
+ parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
+ -- drop initial backslash and any trailing "{}"
+ where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+
+ state :: ParserState
+ state = def{ stateOptions = def{ readerExtensions =
+ enableExtension Ext_raw_tex (readerExtensions def) } }
+
+ texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
+
+maybeRight :: Either a b -> Maybe b
+maybeRight = either (const Nothing) Just
+
+inlineLaTeXCommand :: PandocMonad m => OrgParser m String
+inlineLaTeXCommand = try $ do
+ rest <- getInput
+ parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
+ case parsed of
+ Right (RawInline _ cs) -> do
+ -- drop any trailing whitespace, those are not be part of the command as
+ -- far as org mode is concerned.
+ let cmdNoSpc = dropWhileEnd isSpace cs
+ let len = length cmdNoSpc
+ count len anyChar
+ return cmdNoSpc
+ _ -> mzero
+
+-- Taken from Data.OldList.
+dropWhileEnd :: (a -> Bool) -> [a] -> [a]
+dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
+
+exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
+exportSnippet = try $ do
+ string "@@"
+ format <- many1Till (alphaNum <|> char '-') (char ':')
+ snippet <- manyTill anyChar (try $ string "@@")
+ returnF $ B.rawInline format snippet
+
+smart :: PandocMonad m => OrgParser m (F Inlines)
+smart = do
+ guardEnabled Ext_smart
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+ where
+ orgDash = do
+ guard =<< getExportSetting exportSpecialStrings
+ dash <* updatePositions '-'
+ orgEllipses = do
+ guard =<< getExportSetting exportSpecialStrings
+ ellipses <* updatePositions '.'
+ orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: PandocMonad m => OrgParser m (F Inlines)
+singleQuoted = try $ do
+ guard =<< getExportSetting exportSmartQuotes
+ singleQuoteStart
+ updatePositions '\''
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline (singleQuoteEnd <* updatePositions '\'')
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: PandocMonad m => OrgParser m (F Inlines)
+doubleQuoted = try $ do
+ guard =<< getExportSetting exportSmartQuotes
+ doubleQuoteStart
+ updatePositions '"'
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (fmap B.doubleQuoted . trimInlinesF $ contents))
+ <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
new file mode 100644
index 000000000..2f4e21248
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-
+Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Meta
+ Copyright : Copyright (C) 2014-2017 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode meta declarations.
+-}
+module Text.Pandoc.Readers.Org.Meta
+ ( metaExport
+ , metaKey
+ , metaLine
+ ) where
+
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
+import Text.Pandoc.Readers.Org.Inlines
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Blocks, Inlines )
+import Text.Pandoc.Class ( PandocMonad )
+import Text.Pandoc.Definition
+
+import Control.Monad ( mzero, void )
+import Data.Char ( toLower )
+import Data.List ( intersperse )
+import qualified Data.Map as M
+import Data.Monoid ( (<>) )
+import Network.HTTP ( urlEncode )
+
+-- | Returns the current meta, respecting export options.
+metaExport :: Monad m => OrgParser m (F Meta)
+metaExport = do
+ st <- getState
+ let settings = orgStateExportSettings st
+ return $ (if exportWithAuthor settings then id else removeMeta "author")
+ . (if exportWithCreator settings then id else removeMeta "creator")
+ . (if exportWithEmail settings then id else removeMeta "email")
+ <$> orgStateMeta st
+
+removeMeta :: String -> Meta -> Meta
+removeMeta key meta' =
+ let metaMap = unMeta meta'
+ in Meta $ M.delete key metaMap
+
+-- | Parse and handle a single line containing meta information
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLine :: PandocMonad m => OrgParser m Blocks
+metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
+
+declarationLine :: PandocMonad m => OrgParser m ()
+declarationLine = try $ do
+ key <- map toLower <$> metaKey
+ (key', value) <- metaValue key
+ updateState $ \st ->
+ let meta' = B.setMeta key' <$> value <*> pure nullMeta
+ in st { orgStateMeta = meta' <> orgStateMeta st }
+
+metaKey :: Monad m => OrgParser m String
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
+
+metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue))
+metaValue key =
+ let inclKey = "header-includes"
+ in case key of
+ "author" -> (key,) <$> metaInlinesCommaSeparated
+ "title" -> (key,) <$> metaInlines
+ "date" -> (key,) <$> metaInlines
+ "header-includes" -> (key,) <$> accumulatingList key metaInlines
+ "latex_header" -> (inclKey,) <$>
+ accumulatingList inclKey (metaExportSnippet "latex")
+ "latex_class" -> ("documentclass",) <$> metaString
+ -- Org-mode expects class options to contain the surrounding brackets,
+ -- pandoc does not.
+ "latex_class_options" -> ("classoption",) <$>
+ metaModifiedString (filter (`notElem` "[]"))
+ "html_head" -> (inclKey,) <$>
+ accumulatingList inclKey (metaExportSnippet "html")
+ _ -> (key,) <$> metaString
+
+metaInlines :: PandocMonad m => OrgParser m (F MetaValue)
+metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
+
+metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
+metaInlinesCommaSeparated = do
+ authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
+ newline
+ authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
+ let toMetaInlines = MetaInlines . B.toList
+ return $ MetaList . map toMetaInlines <$> sequence authors
+
+metaString :: Monad m => OrgParser m (F MetaValue)
+metaString = metaModifiedString id
+
+metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
+metaModifiedString f = return . MetaString . f <$> anyLine
+
+-- | Read an format specific meta definition
+metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
+metaExportSnippet format =
+ return . MetaInlines . B.toList . B.rawInline format <$> anyLine
+
+-- | Accumulate the result of the @parser@ in a list under @key@.
+accumulatingList :: Monad m => String
+ -> OrgParser m (F MetaValue)
+ -> OrgParser m (F MetaValue)
+accumulatingList key p = do
+ value <- p
+ meta' <- orgStateMeta <$> getState
+ return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value
+ where curList m = case lookupMeta key m of
+ Just (MetaList ms) -> ms
+ Just x -> [x]
+ _ -> []
+
+--
+-- export options
+--
+optionLine :: Monad m => OrgParser m ()
+optionLine = try $ do
+ key <- metaKey
+ case key of
+ "link" -> parseLinkFormat >>= uncurry addLinkFormat
+ "options" -> exportSettings
+ "todo" -> todoSequence >>= updateState . registerTodoSequence
+ "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
+ "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
+ _ -> mzero
+
+addLinkFormat :: Monad m => String
+ -> (String -> String)
+ -> OrgParser m ()
+addLinkFormat key formatter = updateState $ \s ->
+ let fs = orgStateLinkFormatters s
+ in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
+parseLinkFormat :: Monad m => OrgParser m ((String, String -> String))
+parseLinkFormat = try $ do
+ linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkSubst <- parseFormat
+ return (linkType, linkSubst)
+
+-- | An ad-hoc, single-argument-only implementation of a printf-style format
+-- parser.
+parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat = try $ do
+ replacePlain <|> replaceUrl <|> justAppend
+ where
+ -- inefficient, but who cares
+ replacePlain = try $ (\x -> concat . flip intersperse x)
+ <$> sequence [tillSpecifier 's', rest]
+ replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ <$> sequence [tillSpecifier 'h', rest]
+ justAppend = try $ (++) <$> rest
+
+ rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+
+inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
+inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+
+--
+-- ToDo Sequences and Keywords
+--
+todoSequence :: Monad m => OrgParser m TodoSequence
+todoSequence = try $ do
+ todoKws <- todoKeywords
+ doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+ newline
+ -- There must be at least one DONE keyword. The last TODO keyword is taken if
+ -- necessary.
+ case doneKws of
+ Just done -> return $ keywordsToSequence todoKws done
+ Nothing -> case reverse todoKws of
+ [] -> mzero -- no keywords present
+ (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
+
+ where
+ todoKeywords :: Monad m => OrgParser m [String]
+ todoKeywords = try $
+ let keyword = many1 nonspaceChar <* skipSpaces
+ endOfKeywords = todoDoneSep <|> void newline
+ in manyTill keyword (lookAhead endOfKeywords)
+
+ todoDoneSep :: Monad m => OrgParser m ()
+ todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
+
+ keywordsToSequence :: [String] -> [String] -> TodoSequence
+ keywordsToSequence todo done =
+ let todoMarkers = map (TodoMarker Todo) todo
+ doneMarkers = map (TodoMarker Done) done
+ in todoMarkers ++ doneMarkers
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
new file mode 100644
index 000000000..181dd1d5c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -0,0 +1,259 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Define the Org-mode parser state.
+-}
+module Text.Pandoc.Readers.Org.ParserState
+ ( OrgParserState (..)
+ , OrgParserLocal (..)
+ , OrgNoteRecord
+ , HasReaderOptions (..)
+ , HasQuoteContext (..)
+ , TodoMarker (..)
+ , TodoSequence
+ , TodoState (..)
+ , activeTodoMarkers
+ , registerTodoSequence
+ , F(..)
+ , askF
+ , asksF
+ , trimInlinesF
+ , runF
+ , returnF
+ , ExportSettings (..)
+ , ArchivedTreesOption (..)
+ , optionsToParserState
+ ) where
+
+import Control.Monad (liftM, liftM2)
+import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local)
+
+import Data.Default (Default(..))
+import qualified Data.Map as M
+import qualified Data.Set as Set
+
+import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
+import Text.Pandoc.Definition ( Meta(..), nullMeta )
+import Text.Pandoc.Options ( ReaderOptions(..) )
+import Text.Pandoc.Parsing ( HasHeaderMap(..)
+ , HasIdentifierList(..)
+ , HasLastStrPosition(..)
+ , HasQuoteContext(..)
+ , HasReaderOptions(..)
+ , ParserContext(..)
+ , QuoteContext(..)
+ , SourcePos )
+
+-- | An inline note / footnote containing the note key and its (inline) value.
+type OrgNoteRecord = (String, F Blocks)
+-- | Table of footnotes
+type OrgNoteTable = [OrgNoteRecord]
+-- | Map of functions for link transformations. The map key is refers to the
+-- link-type, the corresponding function transforms the given link string.
+type OrgLinkFormatters = M.Map String (String -> String)
+
+-- | The states in which a todo item can be
+data TodoState = Todo | Done
+ deriving (Eq, Ord, Show)
+
+-- | A ToDo keyword like @TODO@ or @DONE@.
+data TodoMarker = TodoMarker
+ { todoMarkerState :: TodoState
+ , todoMarkerName :: String
+ }
+ deriving (Show, Eq)
+
+-- | Collection of todo markers in the order in which items should progress
+type TodoSequence = [TodoMarker]
+
+-- | Org-mode parser state
+data OrgParserState = OrgParserState
+ { orgStateAnchorIds :: [String]
+ , orgStateEmphasisCharStack :: [Char]
+ , orgStateEmphasisNewlines :: Maybe Int
+ , orgStateExportSettings :: ExportSettings
+ , orgStateHeaderMap :: M.Map Inlines String
+ , orgStateIdentifiers :: Set.Set String
+ , orgStateLastForbiddenCharPos :: Maybe SourcePos
+ , orgStateLastPreCharPos :: Maybe SourcePos
+ , orgStateLastStrPos :: Maybe SourcePos
+ , orgStateLinkFormatters :: OrgLinkFormatters
+ , orgStateMeta :: F Meta
+ , orgStateNotes' :: OrgNoteTable
+ , orgStateOptions :: ReaderOptions
+ , orgStateParserContext :: ParserContext
+ , orgStateTodoSequences :: [TodoSequence]
+ }
+
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
+
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote
+
+instance HasReaderOptions OrgParserState where
+ extractReaderOptions = orgStateOptions
+
+instance HasLastStrPosition OrgParserState where
+ getLastStrPos = orgStateLastStrPos
+ setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
+instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
+instance Default OrgParserState where
+ def = defaultOrgParserState
+
+defaultOrgParserState :: OrgParserState
+defaultOrgParserState = OrgParserState
+ { orgStateAnchorIds = []
+ , orgStateEmphasisCharStack = []
+ , orgStateEmphasisNewlines = Nothing
+ , orgStateExportSettings = def
+ , orgStateHeaderMap = M.empty
+ , orgStateIdentifiers = Set.empty
+ , orgStateLastForbiddenCharPos = Nothing
+ , orgStateLastPreCharPos = Nothing
+ , orgStateLastStrPos = Nothing
+ , orgStateLinkFormatters = M.empty
+ , orgStateMeta = return nullMeta
+ , orgStateNotes' = []
+ , orgStateOptions = def
+ , orgStateParserContext = NullState
+ , orgStateTodoSequences = []
+ }
+
+optionsToParserState :: ReaderOptions -> OrgParserState
+optionsToParserState opts =
+ def { orgStateOptions = opts }
+
+registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState
+registerTodoSequence todoSeq st =
+ let curSeqs = orgStateTodoSequences st
+ in st{ orgStateTodoSequences = todoSeq : curSeqs }
+
+-- | Get the current todo/done sequences. If no custom todo sequences have been
+-- defined, return a list containing just the default todo/done sequence.
+activeTodoSequences :: OrgParserState -> [TodoSequence]
+activeTodoSequences st =
+ let curSeqs = orgStateTodoSequences st
+ in if null curSeqs
+ then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]]
+ else curSeqs
+
+activeTodoMarkers :: OrgParserState -> TodoSequence
+activeTodoMarkers = concat . activeTodoSequences
+
+
+--
+-- Export Settings
+--
+
+-- | Options for the way archived trees are handled.
+data ArchivedTreesOption =
+ ArchivedTreesExport -- ^ Export the complete tree
+ | ArchivedTreesNoExport -- ^ Exclude archived trees from exporting
+ | ArchivedTreesHeadlineOnly -- ^ Export only the headline, discard the contents
+
+-- | Export settings <http://orgmode.org/manual/Export-settings.html>
+-- These settings can be changed via OPTIONS statements.
+data ExportSettings = ExportSettings
+ { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
+ , exportDrawers :: Either [String] [String]
+ -- ^ Specify drawer names which should be exported. @Left@ names are
+ -- explicitly excluded from the resulting output while @Right@ means that
+ -- only the listed drawer names should be included.
+ , exportEmphasizedText :: Bool -- ^ Parse emphasized text
+ , exportHeadlineLevels :: Int
+ -- ^ Maximum depth of headlines, deeper headlines are convert to list
+ , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
+ , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
+ , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
+ , exportWithAuthor :: Bool -- ^ Include author in final meta-data
+ , exportWithCreator :: Bool -- ^ Include creator in final meta-data
+ , exportWithEmail :: Bool -- ^ Include email in final meta-data
+ , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
+ }
+
+instance Default ExportSettings where
+ def = defaultExportSettings
+
+defaultExportSettings :: ExportSettings
+defaultExportSettings = ExportSettings
+ { exportArchivedTrees = ArchivedTreesHeadlineOnly
+ , exportDrawers = Left ["LOGBOOK"]
+ , exportEmphasizedText = True
+ , exportHeadlineLevels = 3
+ , exportSmartQuotes = True
+ , exportSpecialStrings = True
+ , exportSubSuperscripts = True
+ , exportWithAuthor = True
+ , exportWithCreator = True
+ , exportWithEmail = True
+ , exportWithTodoKeywords = True
+ }
+
+
+--
+-- Parser state reader
+--
+
+-- | Reader monad wrapping the parser state. This is used to delay evaluation
+-- until all relevant information has been parsed and made available in the
+-- parser state. See also the newtype of the same name in
+-- Text.Pandoc.Parsing.
+newtype F a = F { unF :: Reader OrgParserState a
+ } deriving (Functor, Applicative, Monad)
+
+instance Monoid a => Monoid (F a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = fmap mconcat . sequence
+
+runF :: F a -> OrgParserState -> a
+runF = runReader . unF
+
+askF :: F OrgParserState
+askF = F ask
+
+asksF :: (OrgParserState -> a) -> F a
+asksF f = F $ asks f
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
+returnF :: Monad m => a -> m (F a)
+returnF = return . return
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
new file mode 100644
index 000000000..1eb8a3b00
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -0,0 +1,217 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Org-mode parsing utilities.
+
+Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
+functions are adapted to Org-mode specific functionality.
+-}
+module Text.Pandoc.Readers.Org.Parsing
+ ( OrgParser
+ , anyLine
+ , blanklines
+ , newline
+ , parseFromString
+ , skipSpaces1
+ , inList
+ , withContext
+ , getExportSetting
+ , updateLastForbiddenCharPos
+ , updateLastPreCharPos
+ , orgArgKey
+ , orgArgWord
+ , orgArgWordChar
+ -- * Re-exports from Text.Pandoc.Parser
+ , ParserContext (..)
+ , many1Till
+ , notFollowedBy'
+ , spaceChar
+ , nonspaceChar
+ , skipSpaces
+ , blankline
+ , enclosed
+ , stringAnyCase
+ , charsInBalanced
+ , uri
+ , withRaw
+ , readWithM
+ , guardEnabled
+ , updateLastStrPos
+ , notAfterString
+ , ParserState (..)
+ , registerHeader
+ , QuoteContext (..)
+ , singleQuoteStart
+ , singleQuoteEnd
+ , doubleQuoteStart
+ , doubleQuoteEnd
+ , dash
+ , ellipses
+ , citeKey
+ -- * Re-exports from Text.Pandoc.Parsec
+ , runParser
+ , runParserT
+ , getInput
+ , char
+ , letter
+ , digit
+ , alphaNum
+ , skipMany1
+ , spaces
+ , anyChar
+ , satisfy
+ , string
+ , count
+ , eof
+ , noneOf
+ , oneOf
+ , lookAhead
+ , notFollowedBy
+ , many
+ , many1
+ , manyTill
+ , (<|>)
+ , (<?>)
+ , choice
+ , try
+ , sepBy
+ , sepBy1
+ , sepEndBy1
+ , option
+ , optional
+ , optionMaybe
+ , getState
+ , updateState
+ , SourcePos
+ , getPosition
+ ) where
+
+import Text.Pandoc.Readers.Org.ParserState
+
+import qualified Text.Pandoc.Parsing as P
+import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
+ , parseFromString )
+
+import Control.Monad ( guard )
+import Control.Monad.Reader ( ReaderT )
+
+-- | The parser used to read org files.
+type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+-- | Parse any line of text
+anyLine :: Monad m => OrgParser m String
+anyLine =
+ P.anyLine
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
+parseFromString parser str' = do
+ oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+ updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+ result <- P.parseFromString parser str'
+ updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ return result
+
+-- | Skip one or more tab or space characters.
+skipSpaces1 :: Monad m => OrgParser m ()
+skipSpaces1 = skipMany1 spaceChar
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
+newline :: Monad m => OrgParser m Char
+newline =
+ P.newline
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: Monad m => OrgParser m [Char]
+blanklines =
+ P.blanklines
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Succeeds when we're in list context.
+inList :: Monad m => OrgParser m ()
+inList = do
+ ctx <- orgStateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: Monad m
+ => ParserContext -- ^ New parser context
+ -> OrgParser m a -- ^ Parser to run in that context
+ -> OrgParser m a
+withContext context parser = do
+ oldContext <- orgStateParserContext <$> getState
+ updateState $ \s -> s{ orgStateParserContext = context }
+ result <- parser
+ updateState $ \s -> s{ orgStateParserContext = oldContext }
+ return result
+
+--
+-- Parser state functions
+--
+
+-- | Get an export setting.
+getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a
+getExportSetting s = s . orgStateExportSettings <$> getState
+
+-- | Set the current position as the last position at which a forbidden char
+-- was found (i.e. a character which is not allowed at the inner border of
+-- markup).
+updateLastForbiddenCharPos :: Monad m => OrgParser m ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
+
+-- | Set the current parser position as the position at which a character was
+-- seen which allows inline markup to follow.
+updateLastPreCharPos :: Monad m => OrgParser m ()
+updateLastPreCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+
+--
+-- Org key-value parsing
+--
+
+-- | Read the key of a plist style key-value list.
+orgArgKey :: Monad m => OrgParser m String
+orgArgKey = try $
+ skipSpaces *> char ':'
+ *> many1 orgArgWordChar
+
+-- | Read the value of a plist style key-value list.
+orgArgWord :: Monad m => OrgParser m String
+orgArgWord = many1 orgArgWordChar
+
+-- | Chars treated as part of a word in plists.
+orgArgWordChar :: Monad m => OrgParser m Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
new file mode 100644
index 000000000..8c87cfa25
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -0,0 +1,97 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Utility functions used in other Pandoc Org modules.
+-}
+module Text.Pandoc.Readers.Org.Shared
+ ( cleanLinkString
+ , isImageFilename
+ , rundocBlockClass
+ , toRundocAttrib
+ , translateLang
+ ) where
+
+import Control.Arrow ( first )
+import Data.Char ( isAlphaNum )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+
+-- | Check whether the given string looks like the path to of URL of an image.
+isImageFilename :: String -> Bool
+isImageFilename filename =
+ any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
+ (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
+ ':' `notElem` filename)
+ where
+ imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+ protocols = [ "file", "http", "https" ]
+
+-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+ case s of
+ '/':_ -> Just $ "file://" ++ s -- absolute path
+ '.':'/':_ -> Just s -- relative path
+ '.':'.':'/':_ -> Just s -- relative path
+ -- Relative path or URL (file schema)
+ 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+ _ | isUrl s -> Just s -- URL
+ _ -> Nothing
+ where
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+-- | Prefix the name of a attribute, marking it as a code execution parameter.
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first (rundocPrefix ++)
+
+-- | Translate from Org-mode's programming language identifiers to those used
+-- by Pandoc. This is useful to allow for proper syntax highlighting in
+-- Pandoc output.
+translateLang :: String -> String
+translateLang cs =
+ case cs of
+ "C" -> "c"
+ "C++" -> "cpp"
+ "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported
+ "js" -> "javascript"
+ "lisp" -> "commonlisp"
+ "R" -> "r"
+ "sh" -> "bash"
+ "sqlite" -> "sql"
+ _ -> cs
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..441c573d9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,1354 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.RST
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.RST ( readRST ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder (setMeta, fromList)
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Error
+import Control.Monad ( when, liftM, guard, mzero )
+import Data.List ( findIndex, intercalate, isInfixOf,
+ transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
+import Data.Maybe (fromMaybe, isJust)
+import qualified Data.Map as M
+import Text.Printf ( printf )
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Data.Sequence (viewr, ViewR(..))
+import Data.Char (toLower, isHexDigit, isSpace, toUpper)
+import Data.Monoid ((<>))
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
+
+-- TODO:
+-- [ ] .. parsed-literal
+-- [ ] :widths: attribute in .. table
+-- [ ] .. csv-table
+-- [ ] .. list-table
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readRST opts s = do
+ parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+type RSTParser m = ParserT [Char] ParserState m
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers :: [Char]
+bulletListMarkers = "*+-"
+
+underlineChars :: [Char]
+underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221"
+
+--
+-- parsing documents
+--
+
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _ _) = x == n
+isHeader _ _ = False
+
+-- | Promote all headers in a list of blocks. (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level attr text):rest) =
+ (Header (level - num) attr text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders _ [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers. Also process a definition list right
+-- after the title block as metadata.
+titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
+ -> ([Block], Meta) -- ^ modified list of blocks, metadata
+titleTransform (bs, meta) =
+ let (bs', meta') =
+ case bs of
+ ((Header 1 _ head1):(Header 2 _ head2):rest)
+ | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
+ (promoteHeaders 2 rest, setMeta "title" (fromList head1) $
+ setMeta "subtitle" (fromList head2) meta)
+ ((Header 1 _ head1):rest)
+ | not (any (isHeader 1) rest) -> -- title only
+ (promoteHeaders 1 rest,
+ setMeta "title" (fromList head1) meta)
+ _ -> (bs, meta)
+ in case bs' of
+ (DefinitionList ds : rest) ->
+ (rest, metaFromDefList ds meta')
+ _ -> (bs', meta')
+
+metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
+metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
+ where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
+ adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
+ $ M.adjust toPlain "date"
+ $ M.adjust toPlain "title"
+ $ M.mapKeys (\k -> if k == "authors" then "author" else k)
+ $ metamap
+ toPlain (MetaBlocks [Para xs]) = MetaInlines xs
+ toPlain x = x
+ splitAuthors (MetaBlocks [Para xs])
+ = MetaList $ map MetaInlines
+ $ splitAuthors' xs
+ splitAuthors x = x
+ splitAuthors' = map normalizeSpaces .
+ splitOnSemi . concatMap factorSemi
+ splitOnSemi = splitBy (==Str ";")
+ factorSemi (Str []) = []
+ factorSemi (Str s) = case break (==';') s of
+ (xs,[]) -> [Str xs]
+ (xs,';':ys) -> Str xs : Str ";" :
+ factorSemi (Str ys)
+ (xs,ys) -> Str xs :
+ factorSemi (Str ys)
+ factorSemi x = [x]
+
+parseRST :: PandocMonad m => RSTParser m Pandoc
+parseRST = do
+ optional blanklines -- skip blank lines at beginning of file
+ startPos <- getPosition
+ -- go through once just to get list of reference keys and notes
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- concat <$>
+ manyTill (referenceKey <|> noteBlock <|> lineClump) eof
+ setInput docMinusKeys
+ setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ -- now parse it for real...
+ blocks <- B.toList <$> parseBlocks
+ standalone <- getOption readerStandalone
+ state <- getState
+ let meta = stateMeta state
+ let (blocks', meta') = if standalone
+ then titleTransform (blocks, meta)
+ else (blocks, meta)
+ reportLogMessages
+ return $ Pandoc meta' blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: PandocMonad m => RSTParser m Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: PandocMonad m => RSTParser m Blocks
+block = choice [ codeBlock
+ , blockQuote
+ , fieldList
+ , include
+ , directive
+ , comment
+ , header
+ , hrule
+ , lineBlock -- must go before definitionList
+ , table
+ , list
+ , lhsCodeBlock
+ , para
+ , mempty <$ blanklines
+ ] <?> "block"
+
+--
+-- field list
+--
+
+rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
+rawFieldListItem minIndent = try $ do
+ indent <- length <$> many (char ' ')
+ guard $ indent >= minIndent
+ char ':'
+ name <- many1Till (noneOf "\n") (char ':')
+ (() <$ lookAhead newline) <|> skipMany1 spaceChar
+ first <- anyLine
+ rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
+ indentedBlock
+ let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
+ return (name, raw)
+
+fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
+fieldListItem minIndent = try $ do
+ (name, raw) <- rawFieldListItem minIndent
+ term <- parseInlineFromString name
+ contents <- parseFromString parseBlocks raw
+ optional blanklines
+ return (term, [contents])
+
+fieldList :: PandocMonad m => RSTParser m Blocks
+fieldList = try $ do
+ indent <- length <$> lookAhead (many spaceChar)
+ items <- many1 $ fieldListItem indent
+ case items of
+ [] -> return mempty
+ items' -> return $ B.definitionList items'
+
+--
+-- line block
+--
+
+lineBlock :: PandocMonad m => RSTParser m Blocks
+lineBlock = try $ do
+ lines' <- lineBlockLines
+ lines'' <- mapM parseInlineFromString lines'
+ return $ B.lineBlock lines''
+
+lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
+lineBlockDirective body = do
+ lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
+ return $ B.lineBlock lines'
+
+--
+-- paragraph block
+--
+
+-- note: paragraph can end in a :: starting a code block
+para :: PandocMonad m => RSTParser m Blocks
+para = try $ do
+ result <- trimInlines . mconcat <$> many1 inline
+ option (B.plain result) $ try $ do
+ newline
+ blanklines
+ case viewr (B.unMany result) of
+ ys :> (Str xs) | "::" `isSuffixOf` xs -> do
+ raw <- option mempty codeBlockBody
+ return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
+ <> raw
+ _ -> return (B.para result)
+
+plain :: PandocMonad m => RSTParser m Blocks
+plain = B.plain . trimInlines . mconcat <$> many1 inline
+
+--
+-- header blocks
+--
+
+header :: PandocMonad m => RSTParser m Blocks
+header = doubleHeader <|> singleHeader <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader :: PandocMonad m => RSTParser m Blocks
+doubleHeader = try $ do
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else return ()
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ attr <- registerHeader nullAttr txt
+ return $ B.headerWith attr level txt
+
+-- a header with line on the bottom only
+singleHeader :: PandocMonad m => RSTParser m Blocks
+singleHeader = try $ do
+ notFollowedBy' whitespace
+ txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ attr <- registerHeader nullAttr txt
+ return $ B.headerWith attr level txt
+
+--
+-- hrule block
+--
+
+hrule :: Monad m => ParserT [Char] st m Blocks
+hrule = try $ do
+ chr <- oneOf underlineChars
+ count 3 (char chr)
+ skipMany (char chr)
+ blankline
+ blanklines
+ return B.horizontalRule
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine :: Monad m => String -> ParserT [Char] st m [Char]
+indentedLine indents = try $ do
+ string indents
+ anyLine
+
+-- one or more indented lines, possibly separated by blank lines.
+-- any amount of indentation will work.
+indentedBlock :: Monad m => ParserT [Char] st m [Char]
+indentedBlock = try $ do
+ indents <- lookAhead $ many1 spaceChar
+ lns <- many1 $ try $ do b <- option "" blanklines
+ l <- indentedLine indents
+ return (b ++ l)
+ optional blanklines
+ return $ unlines lns
+
+quotedBlock :: Monad m => ParserT [Char] st m [Char]
+quotedBlock = try $ do
+ quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+ lns <- many1 $ lookAhead (char quote) >> anyLine
+ optional blanklines
+ return $ unlines lns
+
+codeBlockStart :: Monad m => ParserT [Char] st m Char
+codeBlockStart = string "::" >> blankline >> blankline
+
+codeBlock :: Monad m => ParserT [Char] st m Blocks
+codeBlock = try $ codeBlockStart >> codeBlockBody
+
+codeBlockBody :: Monad m => ParserT [Char] st m Blocks
+codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
+ (indentedBlock <|> quotedBlock)
+
+lhsCodeBlock :: Monad m => RSTParser m Blocks
+lhsCodeBlock = try $ do
+ getPosition >>= guard . (==1) . sourceColumn
+ guardEnabled Ext_literate_haskell
+ optional codeBlockStart
+ lns <- latexCodeBlock <|> birdCodeBlock
+ blanklines
+ return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
+ $ intercalate "\n" lns
+
+latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+latexCodeBlock = try $ do
+ try (latexBlockLine "\\begin{code}")
+ many1Till anyLine (try $ latexBlockLine "\\end{code}")
+ where
+ latexBlockLine s = skipMany spaceChar >> string s >> blankline
+
+birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+birdCodeBlock = filterSpace <$> many1 birdTrackLine
+ where filterSpace lns =
+ -- if (as is normal) there is always a space after >, drop it
+ if all (\ln -> null ln || take 1 ln == " ") lns
+ then map (drop 1) lns
+ else lns
+
+birdTrackLine :: Monad m => ParserT [Char] st m [Char]
+birdTrackLine = char '>' >> anyLine
+
+--
+-- block quotes
+--
+
+blockQuote :: PandocMonad m => RSTParser m Blocks
+blockQuote = do
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return $ B.blockQuote contents
+
+{-
+Unsupported options for include:
+tab-width
+encoding
+-}
+
+include :: PandocMonad m => RSTParser m Blocks
+include = try $ do
+ string ".. include::"
+ skipMany spaceChar
+ f <- trim <$> anyLine
+ fields <- many $ rawFieldListItem 3
+ -- options
+ let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
+ let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
+ guard $ not (null f)
+ oldPos <- getPosition
+ oldInput <- getInput
+ containers <- stateContainers <$> getState
+ when (f `elem` containers) $
+ throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ updateState $ \s -> s{ stateContainers = f : stateContainers s }
+ mbContents <- readFileFromDirs ["."] f
+ contentLines <- case mbContents of
+ Just s -> return $ lines s
+ Nothing -> do
+ logMessage $ CouldNotLoadIncludeFile f oldPos
+ return []
+ let numLines = length contentLines
+ let startLine' = case startLine of
+ Nothing -> 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let endLine' = case endLine of
+ Nothing -> numLines + 1
+ Just x | x >= 0 -> x
+ | otherwise -> numLines + x -- negative from end
+ let contentLines' = drop (startLine' - 1)
+ $ take (endLine' - 1)
+ $ contentLines
+ let contentLines'' = (case trim <$> lookup "end-before" fields of
+ Just patt -> takeWhile (not . (patt `isInfixOf`))
+ Nothing -> id) .
+ (case trim <$> lookup "start-after" fields of
+ Just patt -> drop 1 .
+ dropWhile (not . (patt `isInfixOf`))
+ Nothing -> id) $ contentLines'
+ let contents' = unlines contentLines''
+ case lookup "code" fields of
+ Just lang -> do
+ let numberLines = lookup "number-lines" fields
+ let classes = trimr lang : ["numberLines" | isJust numberLines] ++
+ maybe [] words (lookup "class" fields)
+ let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines
+ let ident = maybe "" trimr $ lookup "name" fields
+ let attribs = (ident, classes, kvs)
+ return $ B.codeBlockWith attribs contents'
+ Nothing -> case lookup "literal" fields of
+ Just _ -> return $ B.rawBlock "rst" contents'
+ Nothing -> do
+ setPosition $ newPos f 1 1
+ setInput contents'
+ bs <- optional blanklines >>
+ (mconcat <$> many block)
+ setInput oldInput
+ setPosition oldPos
+ updateState $ \s -> s{ stateContainers =
+ tail $ stateContainers s }
+ return bs
+
+
+--
+-- list blocks
+--
+
+list :: PandocMonad m => RSTParser m Blocks
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks])
+definitionListItem = try $ do
+ -- avoid capturing a directive or comment
+ notFollowedBy (try $ char '.' >> char '.')
+ term <- trimInlines . mconcat <$> many1Till inline endline
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n"
+ return (term, [contents])
+
+definitionList :: PandocMonad m => RSTParser m Blocks
+definitionList = B.definitionList <$> many1 definitionListItem
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart :: Monad m => ParserT [Char] st m Int
+bulletListStart = try $ do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ return $ length (marker:white)
+
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart :: Monad m => ListNumberStyle
+ -> ListNumberDelim
+ -> RSTParser m Int
+orderedListStart style delim = try $ do
+ (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
+ white <- many1 spaceChar
+ return $ markerLen + length white
+
+-- parse a line of a list item
+listLine :: Monad m => Int -> RSTParser m [Char]
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ line <- anyLine
+ return $ line ++ "\n"
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Monad m => Int -> RSTParser m [Char]
+indentWith num = do
+ tabStop <- getOption readerTabStop
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: Monad m => RSTParser m Int
+ -> RSTParser m (Int, [Char])
+rawListItem start = try $ do
+ markerLength <- start
+ firstLine <- anyLine
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Monad m => Int -> RSTParser m [Char]
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return $ blanks ++ concat result
+
+listItem :: PandocMonad m
+ => RSTParser m Int
+ -> RSTParser m Blocks
+listItem start = try $ do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ skipMany1 blankline <|> () <$ lookAhead start
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may itself contain block elements
+ parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n"
+ updateState (\st -> st {stateParserContext = oldContext})
+ return $ case B.toList parsed of
+ [Para xs] -> B.singleton $ Plain xs
+ [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys]
+ [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys]
+ [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys]
+ _ -> parsed
+
+orderedList :: PandocMonad m => RSTParser m Blocks
+orderedList = try $ do
+ (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
+ items <- many1 (listItem (orderedListStart style delim))
+ let items' = compactify items
+ return $ B.orderedListWith (start, style, delim) items'
+
+bulletList :: PandocMonad m => RSTParser m Blocks
+bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart)
+
+--
+-- directive (e.g. comment, container, compound-paragraph)
+--
+
+comment :: Monad m => RSTParser m Blocks
+comment = try $ do
+ string ".."
+ skipMany1 spaceChar <|> (() <$ lookAhead newline)
+ notFollowedBy' directiveLabel
+ manyTill anyChar blanklines
+ optional indentedBlock
+ return mempty
+
+directiveLabel :: Monad m => RSTParser m String
+directiveLabel = map toLower
+ <$> many1Till (letter <|> char '-') (try $ string "::")
+
+directive :: PandocMonad m => RSTParser m Blocks
+directive = try $ do
+ string ".."
+ directive'
+
+directive' :: PandocMonad m => RSTParser m Blocks
+directive' = do
+ skipMany1 spaceChar
+ label <- directiveLabel
+ skipMany spaceChar
+ top <- many $ satisfy (/='\n')
+ <|> try (char '\n' <*
+ notFollowedBy' (rawFieldListItem 3) <*
+ count 3 (char ' ') <*
+ notFollowedBy blankline)
+ newline
+ fields <- many $ rawFieldListItem 3
+ body <- option "" $ try $ blanklines >> indentedBlock
+ optional blanklines
+ let body' = body ++ "\n\n"
+ imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height")
+ where
+ classes = words $ maybe "" trim $ lookup cl fields
+ getAtt k = case lookup k fields of
+ Just v -> [(k, filter (not . isSpace) v)]
+ Nothing -> []
+ case label of
+ "table" -> tableDirective top fields body'
+ "line-block" -> lineBlockDirective body'
+ "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
+ "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
+ "container" -> parseFromString parseBlocks body'
+ "replace" -> B.para <$> -- consumed by substKey
+ parseInlineFromString (trim top)
+ "unicode" -> B.para <$> -- consumed by substKey
+ parseInlineFromString (trim $ unicodeTransform top)
+ "compound" -> parseFromString parseBlocks body'
+ "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
+ "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
+ "highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
+ "rubric" -> B.para . B.strong <$> parseInlineFromString top
+ _ | label `elem` ["attention","caution","danger","error","hint",
+ "important","note","tip","warning","admonition"] ->
+ do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
+ let lab = case label of
+ "admonition" -> mempty
+ (l:ls) -> B.divWith ("",["admonition-title"],[])
+ (B.para (B.str (toUpper l : ls)))
+ [] -> mempty
+ return $ B.divWith ("",[label],[]) (lab <> bod)
+ "sidebar" ->
+ do let subtit = maybe "" trim $ lookup "subtitle" fields
+ tit <- B.para . B.strong <$> parseInlineFromString
+ (trim top ++ if null subtit
+ then ""
+ else (": " ++ subtit))
+ bod <- parseFromString parseBlocks body'
+ return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
+ "topic" ->
+ do tit <- B.para . B.strong <$> parseInlineFromString top
+ bod <- parseFromString parseBlocks body'
+ return $ B.divWith ("",["topic"],[]) $ tit <> bod
+ "default-role" -> mempty <$ updateState (\s ->
+ s { stateRstDefaultRole =
+ case trim top of
+ "" -> stateRstDefaultRole def
+ role -> role })
+ x | x == "code" || x == "code-block" ->
+ codeblock (words $ fromMaybe [] $ lookup "class" fields)
+ (lookup "number-lines" fields) (trim top) body
+ "aafig" -> do
+ let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields)
+ return $ B.codeBlockWith attribs $ stripTrailingNewlines body
+ "math" -> return $ B.para $ mconcat $ map B.displayMath
+ $ toChunks $ top ++ "\n\n" ++ body
+ "figure" -> do
+ (caption, legend) <- parseFromString extractCaption body'
+ let src = escapeURI $ trim top
+ return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend
+ "image" -> do
+ let src = escapeURI $ trim top
+ let alt = B.str $ maybe "image" trim $ lookup "alt" fields
+ let attr = imgAttr "class"
+ return $ B.para
+ $ case lookup "target" fields of
+ Just t -> B.link (escapeURI $ trim t) ""
+ $ B.imageWith attr src "" alt
+ Nothing -> B.imageWith attr src "" alt
+ "class" -> do
+ let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ -- directive content or the first immediately following element
+ children <- case body of
+ "" -> block
+ _ -> parseFromString parseBlocks body'
+ return $ B.divWith attrs children
+ other -> do
+ pos <- getPosition
+ logMessage $ SkippedContent (".. " ++ other) pos
+ return mempty
+
+tableDirective :: PandocMonad m
+ => String -> [(String, String)] -> String -> RSTParser m Blocks
+tableDirective top _fields body = do
+ bs <- parseFromString parseBlocks body
+ case B.toList bs of
+ [Table _ aligns' widths' header' rows'] -> do
+ title <- parseFromString (trimInlines . mconcat <$> many inline) top
+ -- TODO widths
+ -- align is not applicable since we can't represent whole table align
+ return $ B.singleton $ Table (B.toList title)
+ aligns' widths' header' rows'
+ _ -> return mempty
+
+-- TODO:
+-- - Only supports :format: fields with a single format for :raw: roles,
+-- change Text.Pandoc.Definition.Format to fix
+addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks
+addNewRole roleString fields = do
+ pos <- getPosition
+ (role, parentRole) <- parseFromString inheritedRole roleString
+ customRoles <- stateRstCustomRoles <$> getState
+ let getBaseRole (r, f, a) roles =
+ case M.lookup r roles of
+ Just (r', f', a') -> getBaseRole (r', f', a') roles
+ Nothing -> (r, f, a)
+ (baseRole, baseFmt, baseAttr) =
+ getBaseRole (parentRole, Nothing, nullAttr) customRoles
+ fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
+ annotate :: [String] -> [String]
+ annotate = maybe id (:) $
+ if baseRole == "code"
+ then lookup "language" fields
+ else Nothing
+ attr = let (ident, classes, keyValues) = baseAttr
+ -- nub in case role name & language class are the same
+ in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+ -- warn about syntax we ignore
+ flip mapM_ fields $ \(key, _) -> case key of
+ "language" -> when (baseRole /= "code") $ logMessage $
+ SkippedContent ":language: [because parent of role is not :code:]"
+ pos
+ "format" -> when (baseRole /= "raw") $ logMessage $
+ SkippedContent ":format: [because parent of role is not :raw:]" pos
+ _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
+ when (parentRole == "raw" && countKeys "format" > 1) $
+ logMessage $ SkippedContent ":format: [after first in definition of role]"
+ pos
+ when (parentRole == "code" && countKeys "language" > 1) $
+ logMessage $ SkippedContent
+ ":language: [after first in definition of role]" pos
+
+ updateState $ \s -> s {
+ stateRstCustomRoles =
+ M.insert role (baseRole, fmt, attr) customRoles
+ }
+
+ return mempty
+ where
+ countKeys k = length . filter (== k) . map fst $ fields
+ inheritedRole =
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+
+
+-- Can contain character codes as decimal numbers or
+-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
+-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
+-- or text, which is used as-is. Comments start with ..
+unicodeTransform :: String -> String
+unicodeTransform t =
+ case t of
+ ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment
+ ('0':'x':xs) -> go "0x" xs
+ ('x':xs) -> go "x" xs
+ ('\\':'x':xs) -> go "\\x" xs
+ ('U':'+':xs) -> go "U+" xs
+ ('u':xs) -> go "u" xs
+ ('\\':'u':xs) -> go "\\u" xs
+ ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs)
+ -- drop semicolon
+ (\(c,s) -> c : unicodeTransform (drop 1 s))
+ $ extractUnicodeChar xs
+ (x:xs) -> x : unicodeTransform xs
+ [] -> []
+ where go pref zs = maybe (pref ++ unicodeTransform zs)
+ (\(c,s) -> c : unicodeTransform s)
+ $ extractUnicodeChar zs
+
+extractUnicodeChar :: String -> Maybe (Char, String)
+extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
+ where (ds,rest) = span isHexDigit s
+ mbc = safeRead ('\'':'\\':'x':ds ++ "'")
+
+extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
+extractCaption = do
+ capt <- trimInlines . mconcat <$> many inline
+ legend <- optional blanklines >> (mconcat <$> many block)
+ return (capt,legend)
+
+-- divide string by blanklines
+toChunks :: String -> [String]
+toChunks = dropWhile null
+ . map (trim . unlines)
+ . splitBy (all (`elem` (" \t" :: String))) . lines
+
+codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
+codeblock classes numberLines lang body =
+ return $ B.codeBlockWith attribs $ stripTrailingNewlines body
+ where attribs = ("", classes', kvs)
+ classes' = "sourceCode" : lang
+ : maybe [] (\_ -> ["numberLines"]) numberLines
+ ++ classes
+ kvs = case numberLines of
+ Just "" -> []
+ Nothing -> []
+ Just n -> [("startFrom",trim n)]
+
+---
+--- note block
+---
+
+noteBlock :: Monad m => RSTParser m [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ string ".."
+ spaceChar >> skipMany spaceChar
+ ref <- noteMarker
+ first <- (spaceChar >> skipMany spaceChar >> anyLine)
+ <|> (newline >> return "")
+ blanks <- option "" blanklines
+ rest <- option "" indentedBlock
+ endPos <- getPosition
+ let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
+ let newnote = (ref, raw)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+noteMarker :: Monad m => RSTParser m [Char]
+noteMarker = do
+ char '['
+ res <- many1 digit
+ <|> (try $ char '#' >> liftM ('#':) simpleReferenceName')
+ <|> count 1 (oneOf "#*")
+ char ']'
+ return res
+
+--
+-- reference key
+--
+
+quotedReferenceName :: PandocMonad m => RSTParser m Inlines
+quotedReferenceName = try $ do
+ char '`' >> notFollowedBy (char '`') -- `` means inline code!
+ label' <- trimInlines . mconcat <$> many1Till inline (char '`')
+ return label'
+
+unquotedReferenceName :: PandocMonad m => RSTParser m Inlines
+unquotedReferenceName = try $ do
+ label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':')
+ return label'
+
+-- Simple reference names are single words consisting of alphanumerics
+-- plus isolated (no two adjacent) internal hyphens, underscores,
+-- periods, colons and plus signs; no whitespace or other characters
+-- are allowed.
+simpleReferenceName' :: Monad m => ParserT [Char] st m String
+simpleReferenceName' = do
+ x <- alphaNum
+ xs <- many $ alphaNum
+ <|> (try $ oneOf "-_:+." <* lookAhead alphaNum)
+ return (x:xs)
+
+simpleReferenceName :: Monad m => ParserT [Char] st m Inlines
+simpleReferenceName = do
+ raw <- simpleReferenceName'
+ return $ B.str raw
+
+referenceName :: PandocMonad m => RSTParser m Inlines
+referenceName = quotedReferenceName <|>
+ (try $ simpleReferenceName <* lookAhead (char ':')) <|>
+ unquotedReferenceName
+
+referenceKey :: PandocMonad m => RSTParser m [Char]
+referenceKey = do
+ startPos <- getPosition
+ choice [substKey, anonymousKey, regularKey]
+ optional blanklines
+ endPos <- getPosition
+ -- return enough blanks to replace key
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+targetURI :: Monad m => ParserT [Char] st m [Char]
+targetURI = do
+ skipSpaces
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+ blanklines
+ return $ escapeURI $ trim $ contents
+
+substKey :: PandocMonad m => RSTParser m ()
+substKey = try $ do
+ string ".."
+ skipMany1 spaceChar
+ (alt,ref) <- withRaw $ trimInlines . mconcat
+ <$> enclosed (char '|') (char '|') inline
+ res <- B.toList <$> directive'
+ il <- case res of
+ -- use alt unless :alt: attribute on image:
+ [Para [Image attr [Str "image"] (src,tit)]] ->
+ return $ B.imageWith attr src tit alt
+ [Para [Link _ [Image attr [Str "image"] (src,tit)] (src',tit')]] ->
+ return $ B.link src' tit' (B.imageWith attr src tit alt)
+ [Para ils] -> return $ B.fromList ils
+ _ -> mzero
+ let key = toKey $ stripFirstAndLast ref
+ updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s }
+
+anonymousKey :: Monad m => RSTParser m ()
+anonymousKey = try $ do
+ oneOfStrings [".. __:", "__"]
+ src <- targetURI
+ pos <- getPosition
+ let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
+ --TODO: parse width, height, class and name attributes
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+
+stripTicks :: String -> String
+stripTicks = reverse . stripTick . reverse . stripTick
+ where stripTick ('`':xs) = xs
+ stripTick xs = xs
+
+regularKey :: PandocMonad m => RSTParser m ()
+regularKey = try $ do
+ string ".. _"
+ (_,ref) <- withRaw referenceName
+ char ':'
+ src <- targetURI
+ let key = toKey $ stripTicks ref
+ --TODO: parse width, height, class and name attributes
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
+
+--
+-- tables
+--
+
+-- General tables TODO:
+-- - figure out if leading spaces are acceptable and if so, add
+-- support for them
+--
+-- Simple tables TODO:
+-- - column spans
+-- - multiline support
+-- - ensure that rightmost column span does not need to reach end
+-- - require at least 2 columns
+--
+-- Grid tables TODO:
+-- - column spans
+
+dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many (char ' ')
+ return (length dashes, length $ dashes ++ sp)
+
+simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
+simpleDashedLines ch = try $ many1 (dashedLine ch)
+
+-- Parse a table row separator
+simpleTableSep :: Monad m => Char -> RSTParser m Char
+simpleTableSep ch = try $ simpleDashedLines ch >> newline
+
+-- Parse a table footer
+simpleTableFooter :: Monad m => RSTParser m [Char]
+simpleTableFooter = try $ simpleTableSep '=' >> blanklines
+
+-- Parse a raw line and split it into chunks by indices.
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLine indices = do
+ line <- many1Till anyChar newline
+ return (simpleTableSplitLine indices line)
+
+-- Parse a table row and return a list of blocks (columns).
+simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
+simpleTableRow indices = do
+ notFollowedBy' simpleTableFooter
+ firstLine <- simpleTableRawLine indices
+ colLines <- return [] -- TODO
+ let cols = map unlines . transpose $ firstLine : colLines
+ mapM (parseFromString (mconcat <$> many plain)) cols
+
+simpleTableSplitLine :: [Int] -> String -> [String]
+simpleTableSplitLine indices line =
+ map trim
+ $ tail $ splitByIndices (init indices) line
+
+simpleTableHeader :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m ([Blocks], [Alignment], [Int])
+simpleTableHeader headless = try $ do
+ optional blanklines
+ rawContent <- if headless
+ then return ""
+ else simpleTableSep '=' >> anyLine
+ dashes <- simpleDashedLines '=' <|> simpleDashedLines '-'
+ newline
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else simpleTableSplitLine indices rawContent
+ heads <- mapM (parseFromString (mconcat <$> many plain)) $
+ map trim rawHeads
+ return (heads, aligns, indices)
+
+-- Parse a simple table.
+simpleTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
+simpleTable headless = do
+ tbl <- tableWith (simpleTableHeader headless) simpleTableRow
+ sep simpleTableFooter
+ -- Simple tables get 0s for relative column widths (i.e., use default)
+ case B.toList tbl of
+ [Table c a _w h l] -> return $ B.singleton $
+ Table c a (replicate (length a) 0) h l
+ _ ->
+ throwError $ PandocShouldNeverHappenError
+ "tableWith returned something unexpected"
+ where
+ sep = return () -- optional (simpleTableSep '-')
+
+gridTable :: PandocMonad m
+ => Bool -- ^ Headerless table
+ -> RSTParser m Blocks
+gridTable headerless = gridTableWith parseBlocks headerless
+
+table :: PandocMonad m => RSTParser m Blocks
+table = gridTable False <|> simpleTable False <|>
+ gridTable True <|> simpleTable True <?> "table"
+
+--
+-- inline
+--
+
+inline :: PandocMonad m => RSTParser m Inlines
+inline = choice [ note -- can start with whitespace, so try before ws
+ , whitespace
+ , link
+ , str
+ , endline
+ , strong
+ , emph
+ , code
+ , subst
+ , interpretedRole
+ , smart
+ , hyphens
+ , escapedChar
+ , symbol ] <?> "inline"
+
+parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
+parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline)
+
+hyphens :: Monad m => RSTParser m Inlines
+hyphens = do
+ result <- many1 (char '-')
+ optional endline
+ -- don't want to treat endline after hyphen or dash as a space
+ return $ B.str result
+
+escapedChar :: Monad m => ParserT [Char] st m Inlines
+escapedChar = do c <- escaped anyChar
+ return $ if c == ' ' -- '\ ' is null in RST
+ then mempty
+ else B.str [c]
+
+symbol :: Monad m => RSTParser m Inlines
+symbol = do
+ result <- oneOf specialChars
+ return $ B.str [result]
+
+-- parses inline code, between codeStart and codeEnd
+code :: Monad m => RSTParser m Inlines
+code = try $ do
+ string "``"
+ result <- manyTill anyChar (try (string "``"))
+ return $ B.code
+ $ trim $ unwords $ lines result
+
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: Monad m => RSTParser m a -> RSTParser m a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ -- single quote start can't be right after str
+ guard $ stateLastStrPos st /= Just pos
+ p
+
+emph :: PandocMonad m => RSTParser m Inlines
+emph = B.emph . trimInlines . mconcat <$>
+ enclosed (atStart $ char '*') (char '*') inline
+
+strong :: PandocMonad m => RSTParser m Inlines
+strong = B.strong . trimInlines . mconcat <$>
+ enclosed (atStart $ string "**") (try $ string "**") inline
+
+-- Note, this doesn't precisely implement the complex rule in
+-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+-- but it should be good enough for most purposes
+--
+-- TODO:
+-- - Classes are silently discarded in addNewRole
+-- - Lacks sensible implementation for title-reference (which is the default)
+-- - Allows direct use of the :raw: role, rST only allows inherited use.
+interpretedRole :: PandocMonad m => RSTParser m Inlines
+interpretedRole = try $ do
+ (role, contents) <- roleBefore <|> roleAfter
+ renderRole contents Nothing role nullAttr
+
+renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
+renderRole contents fmt role attr = case role of
+ "sup" -> return $ B.superscript $ B.str contents
+ "superscript" -> return $ B.superscript $ B.str contents
+ "sub" -> return $ B.subscript $ B.str contents
+ "subscript" -> return $ B.subscript $ B.str contents
+ "emphasis" -> return $ B.emph $ B.str contents
+ "strong" -> return $ B.strong $ B.str contents
+ "rfc-reference" -> return $ rfcLink contents
+ "RFC" -> return $ rfcLink contents
+ "pep-reference" -> return $ pepLink contents
+ "PEP" -> return $ pepLink contents
+ "literal" -> return $ B.codeWith attr contents
+ "math" -> return $ B.math contents
+ "title-reference" -> titleRef contents
+ "title" -> titleRef contents
+ "t" -> titleRef contents
+ "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
+ "span" -> return $ B.spanWith attr $ B.str contents
+ "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
+ custom -> do
+ customRoles <- stateRstCustomRoles <$> getState
+ case M.lookup custom customRoles of
+ Just (newRole, newFmt, newAttr) ->
+ renderRole contents newFmt newRole newAttr
+ Nothing -> do
+ pos <- getPosition
+ logMessage $ SkippedContent (":" ++ custom ++ ":") pos
+ return $ B.str contents -- Undefined role
+ where
+ titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+ rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
+ where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
+ pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
+ where padNo = replicate (4 - length pepNo) '0' ++ pepNo
+ pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+
+addClass :: String -> Attr -> Attr
+addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+
+roleName :: PandocMonad m => RSTParser m String
+roleName = many1 (letter <|> char '-')
+
+roleMarker :: PandocMonad m => RSTParser m String
+roleMarker = char ':' *> roleName <* char ':'
+
+roleBefore :: PandocMonad m => RSTParser m (String,String)
+roleBefore = try $ do
+ role <- roleMarker
+ contents <- unmarkedInterpretedText
+ return (role,contents)
+
+roleAfter :: PandocMonad m => RSTParser m (String,String)
+roleAfter = try $ do
+ contents <- unmarkedInterpretedText
+ role <- roleMarker <|> (stateRstDefaultRole <$> getState)
+ return (role,contents)
+
+unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
+unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
+
+whitespace :: PandocMonad m => RSTParser m Inlines
+whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+
+str :: Monad m => RSTParser m Inlines
+str = do
+ let strChar = noneOf ("\t\n " ++ specialChars)
+ result <- many1 strChar
+ updateLastStrPos
+ return $ B.str result
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: Monad m => RSTParser m Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts at beginning of line differently in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState
+ then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ notFollowedBy' bulletListStart
+ else return ()
+ return B.softbreak
+
+--
+-- links
+--
+
+link :: PandocMonad m => RSTParser m Inlines
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
+
+explicitLink :: PandocMonad m => RSTParser m Inlines
+explicitLink = try $ do
+ char '`'
+ notFollowedBy (char '`') -- `` marks start of inline code
+ label' <- trimInlines . mconcat <$>
+ manyTill (notFollowedBy (char '`') >> inline) (char '<')
+ src <- trim <$> manyTill (noneOf ">\n") (char '>')
+ skipSpaces
+ string "`_"
+ optional $ char '_' -- anonymous form
+ let label'' = if label' == mempty
+ then B.str src
+ else label'
+ -- `link <google_>` is a reference link to _google!
+ ((src',tit),attr) <- case reverse src of
+ '_':xs -> lookupKey [] (toKey (reverse xs))
+ _ -> return ((src, ""), nullAttr)
+ return $ B.linkWith attr (escapeURI src') tit label''
+
+referenceLink :: PandocMonad m => RSTParser m Inlines
+referenceLink = try $ do
+ (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
+ char '_'
+ let isAnonKey (Key ('_':_)) = True
+ isAnonKey _ = False
+ state <- getState
+ let keyTable = stateKeys state
+ key <- option (toKey $ stripTicks ref) $
+ do char '_'
+ let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
+ case anonKeys of
+ [] -> mzero
+ (k:_) -> return k
+ ((src,tit), attr) <- lookupKey [] key
+ -- if anonymous link, remove key so it won't be used again
+ when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
+ return $ B.linkWith attr src tit label'
+
+-- We keep a list of oldkeys so we can detect lookup loops.
+lookupKey :: PandocMonad m
+ => [Key] -> Key -> RSTParser m ((String, String), Attr)
+lookupKey oldkeys key = do
+ pos <- getPosition
+ state <- getState
+ let keyTable = stateKeys state
+ case M.lookup key keyTable of
+ Nothing -> do
+ let Key key' = key
+ logMessage $ ReferenceNotFound key' pos
+ return (("",""),nullAttr)
+ -- check for keys of the form link_, which need to be resolved:
+ Just ((u@(_:_),""),_) | last u == '_' -> do
+ let rawkey = init u
+ let newkey = toKey rawkey
+ if newkey `elem` oldkeys
+ then do
+ logMessage $ CircularReference rawkey pos
+ return (("",""),nullAttr)
+ else lookupKey (key:oldkeys) newkey
+ Just val -> return val
+
+autoURI :: Monad m => RSTParser m Inlines
+autoURI = do
+ (orig, src) <- uri
+ return $ B.link src "" $ B.str orig
+
+autoEmail :: Monad m => RSTParser m Inlines
+autoEmail = do
+ (orig, src) <- emailAddress
+ return $ B.link src "" $ B.str orig
+
+autoLink :: PandocMonad m => RSTParser m Inlines
+autoLink = autoURI <|> autoEmail
+
+subst :: PandocMonad m => RSTParser m Inlines
+subst = try $ do
+ (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline
+ state <- getState
+ let substTable = stateSubstitutions state
+ let key = toKey $ stripFirstAndLast ref
+ case M.lookup key substTable of
+ Nothing -> do
+ pos <- getPosition
+ logMessage $ ReferenceNotFound (show key) pos
+ return mempty
+ Just target -> return target
+
+note :: PandocMonad m => RSTParser m Inlines
+note = try $ do
+ optional whitespace
+ ref <- noteMarker
+ char '_'
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> do
+ pos <- getPosition
+ logMessage $ ReferenceNotFound ref pos
+ return mempty
+ Just raw -> do
+ -- We temporarily empty the note list while parsing the note,
+ -- so that we don't get infinite loops with notes inside notes...
+ -- Note references inside other notes are allowed in reST, but
+ -- not yet in this implementation.
+ updateState $ \st -> st{ stateNotes = [] }
+ contents <- parseFromString parseBlocks raw
+ let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
+ -- delete the note so the next auto-numbered note
+ -- doesn't get the same contents:
+ then deleteFirstsBy (==) notes [(ref,raw)]
+ else notes
+ updateState $ \st -> st{ stateNotes = newnotes }
+ return $ B.note contents
+
+smart :: PandocMonad m => RSTParser m Inlines
+smart = do
+ guardEnabled Ext_smart
+ doubleQuoted <|> singleQuoted <|>
+ choice [apostrophe, dash, ellipses]
+
+singleQuoted :: PandocMonad m => RSTParser m Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ B.singleQuoted . trimInlines . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+doubleQuoted :: PandocMonad m => RSTParser m Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $
+ B.doubleQuoted . trimInlines . mconcat <$>
+ many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
new file mode 100644
index 000000000..3b89f2ee9
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -0,0 +1,525 @@
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.TWiki
+ Copyright : Copyright (C) 2014 Alexander Sulfrian
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ Stability : alpha
+ Portability : portable
+
+Conversion of twiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.TWiki ( readTWiki
+ ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Options
+import Text.Pandoc.Logging
+import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
+import Control.Monad
+import Text.Pandoc.XML (fromEntities)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Data.Char (isAlphaNum)
+import qualified Data.Foldable as F
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad, report)
+
+-- | Read twiki from an input string and return a Pandoc document.
+readTWiki :: PandocMonad m
+ => ReaderOptions
+ -> String
+ -> m Pandoc
+readTWiki opts s = do
+ res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n")
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type TWParser = ParserT [Char] ParserState
+
+--
+-- utility functions
+--
+
+tryMsg :: String -> TWParser m a -> TWParser m a
+tryMsg msg p = try p <?> msg
+
+skip :: TWParser m a -> TWParser m ()
+skip parser = parser >> return ()
+
+nested :: PandocMonad m => TWParser m a -> TWParser m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
+htmlElement tag = tryMsg tag $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = skip $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: PandocMonad m
+ => String -> TWParser m a -> TWParser m (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
+parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+
+--
+-- main parser
+--
+
+parseTWiki :: PandocMonad m => TWParser m Pandoc
+parseTWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => TWParser m B.Blocks
+block = do
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ return res
+
+blockElements :: PandocMonad m => TWParser m B.Blocks
+blockElements = choice [ separator
+ , header
+ , verbatim
+ , literal
+ , list ""
+ , table
+ , blockQuote
+ , noautolink
+ ]
+
+separator :: PandocMonad m => TWParser m B.Blocks
+separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
+
+header :: PandocMonad m => TWParser m B.Blocks
+header = tryMsg "header" $ do
+ string "---"
+ level <- many1 (char '+') >>= return . length
+ guard $ level <= 6
+ classes <- option [] $ string "!!" >> return ["unnumbered"]
+ skipSpaces
+ content <- B.trimInlines . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", classes, []) content
+ return $ B.headerWith attr level $ content
+
+verbatim :: PandocMonad m => TWParser m B.Blocks
+verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
+ >>= return . (uncurry B.codeBlockWith)
+
+literal :: PandocMonad m => TWParser m B.Blocks
+literal = htmlElement "literal" >>= return . rawBlock
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+list :: PandocMonad m => String -> TWParser m B.Blocks
+list prefix = choice [ bulletList prefix
+ , orderedList prefix
+ , definitionList prefix]
+
+definitionList :: PandocMonad m => String -> TWParser m B.Blocks
+definitionList prefix = tryMsg "definitionList" $ do
+ indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ return $ B.definitionList elements
+ where
+ parseDefinitionListItem :: PandocMonad m
+ => String -> TWParser m (B.Inlines, [B.Blocks])
+ parseDefinitionListItem indent = do
+ string (indent ++ "$ ") >> skipSpaces
+ term <- many1Till inline $ string ": "
+ line <- listItemLine indent $ string "$ "
+ return $ (mconcat term, [line])
+
+bulletList :: PandocMonad m => String -> TWParser m B.Blocks
+bulletList prefix = tryMsg "bulletList" $
+ parseList prefix (char '*') (char ' ')
+
+orderedList :: PandocMonad m => String -> TWParser m B.Blocks
+orderedList prefix = tryMsg "orderedList" $
+ parseList prefix (oneOf "1iIaA") (string ". ")
+
+parseList :: PandocMonad m
+ => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
+parseList prefix marker delim = do
+ (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ return $ case style of
+ '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
+ 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
+ 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
+ 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
+ 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
+ _ -> B.bulletList blocks
+ where
+ listStyle = do
+ indent <- many1 $ string " "
+ style <- marker
+ return (concat indent, style)
+
+parseListItem :: (PandocMonad m, Show a)
+ => String -> TWParser m a -> TWParser m B.Blocks
+parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+
+listItemLine :: (PandocMonad m, Show a)
+ => String -> TWParser m a -> TWParser m B.Blocks
+listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+ where
+ lineContent = do
+ content <- anyLine
+ continuation <- optionMaybe listContinuation
+ return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ filterSpaces = reverse . dropWhile (== ' ') . reverse
+ listContinuation = notFollowedBy (string prefix >> marker) >>
+ string " " >> lineContent
+ parseContent = parseFromString $ many1 $ nestedList <|> parseInline
+ parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
+ return . B.plain . mconcat
+ nestedList = list prefix
+ lastNewline = try $ char '\n' <* eof
+ newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
+
+table :: PandocMonad m => TWParser m B.Blocks
+table = try $ do
+ tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ rows <- many1 tableParseRow
+ return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
+ where
+ buildTable caption rows (aligns, heads)
+ = B.table caption aligns heads rows
+ align rows = replicate (columCount rows) (AlignDefault, 0)
+ columns rows = replicate (columCount rows) mempty
+ columCount rows = length $ head rows
+
+tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
+tableParseHeader = try $ do
+ char '|'
+ leftSpaces <- many spaceChar >>= return . length
+ char '*'
+ content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
+ char '*'
+ rightSpaces <- many spaceChar >>= return . length
+ optional tableEndOfRow
+ return (tableAlign leftSpaces rightSpaces, content)
+ where
+ tableAlign left right
+ | left >= 2 && left == right = (AlignCenter, 0)
+ | left > right = (AlignRight, 0)
+ | otherwise = (AlignLeft, 0)
+
+tableParseRow :: PandocMonad m => TWParser m [B.Blocks]
+tableParseRow = many1Till tableParseColumn newline
+
+tableParseColumn :: PandocMonad m => TWParser m B.Blocks
+tableParseColumn = char '|' *> skipSpaces *>
+ tableColumnContent (skipSpaces >> char '|')
+ <* skipSpaces <* optional tableEndOfRow
+
+tableEndOfRow :: PandocMonad m => TWParser m Char
+tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
+
+tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
+tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+ where
+ content = continuation <|> inline
+ continuation = try $ char '\\' >> newline >> return mempty
+
+blockQuote :: PandocMonad m => TWParser m B.Blocks
+blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+
+noautolink :: PandocMonad m => TWParser m B.Blocks
+noautolink = do
+ (_, content) <- htmlElement "noautolink"
+ st <- getState
+ setState $ st{ stateAllowLinks = False }
+ blocks <- try $ parseContent content
+ setState $ st{ stateAllowLinks = True }
+ return $ mconcat blocks
+ where
+ parseContent = parseFromString $ many $ block
+
+para :: PandocMonad m => TWParser m B.Blocks
+para = many1Till inline endOfParaElement >>= return . result . mconcat
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> skip blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => TWParser m B.Inlines
+inline = choice [ whitespace
+ , br
+ , macro
+ , strong
+ , strongHtml
+ , strongAndEmph
+ , emph
+ , emphHtml
+ , boldCode
+ , smart
+ , link
+ , htmlComment
+ , code
+ , codeHtml
+ , nop
+ , autoLink
+ , str
+ , symbol
+ ] <?> "inline"
+
+whitespace :: PandocMonad m => TWParser m B.Inlines
+whitespace = (lb <|> regsp) >>= return
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: PandocMonad m => TWParser m B.Inlines
+br = try $ string "%BR%" >> return B.linebreak
+
+linebreak :: PandocMonad m => TWParser m B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+between :: (Monoid c, PandocMonad m)
+ => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c)
+ -> TWParser m c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Monoid b, PandocMonad m)
+ => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
+enclosed sep p = between sep (try $ sep <* endMarker) p
+ where
+ endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endSpace = (spaceChar <|> newline) >> return B.space
+
+macro :: PandocMonad m => TWParser m B.Inlines
+macro = macroWithParameters <|> withoutParameters
+ where
+ withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ emptySpan name = buildSpan name [] mempty
+
+macroWithParameters :: PandocMonad m => TWParser m B.Inlines
+macroWithParameters = try $ do
+ char '%'
+ name <- macroName
+ (content, kvs) <- attributes
+ char '%'
+ return $ buildSpan name kvs $ B.str content
+
+buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan className kvs = B.spanWith attrs
+ where
+ attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
+ additionalClasses = maybe [] words $ lookup "class" kvs
+ kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
+
+macroName :: PandocMonad m => TWParser m String
+macroName = do
+ first <- letter
+ rest <- many $ alphaNum <|> char '_'
+ return (first:rest)
+
+attributes :: PandocMonad m => TWParser m (String, [(String, String)])
+attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
+ return . foldr (either mkContent mkKvs) ([], [])
+ where
+ spnl = skipMany (spaceChar <|> newline)
+ mkContent c ([], kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkKvs kv (cont, rest) = (cont, (kv : rest))
+
+attribute :: PandocMonad m => TWParser m (Either String (String, String))
+attribute = withKey <|> withoutKey
+ where
+ withKey = try $ do
+ key <- macroName
+ char '='
+ parseValue False >>= return . (curry Right key)
+ withoutKey = try $ parseValue True >>= return . Left
+ parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withoutQuotes allowSpaces
+ | allowSpaces == True = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
+
+nestedInlines :: (Show a, PandocMonad m)
+ => TWParser m a -> TWParser m B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* (notFollowedBy end)
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+strong :: PandocMonad m => TWParser m B.Inlines
+strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+
+strongHtml :: PandocMonad m => TWParser m B.Inlines
+strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
+ >>= return . B.strong . mconcat
+
+strongAndEmph :: PandocMonad m => TWParser m B.Inlines
+strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+
+emph :: PandocMonad m => TWParser m B.Inlines
+emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+
+emphHtml :: PandocMonad m => TWParser m B.Inlines
+emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
+ >>= return . B.emph . mconcat
+
+nestedString :: (Show a, PandocMonad m)
+ => TWParser m a -> TWParser m String
+nestedString end = innerSpace <|> (count 1 nonspaceChar)
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+boldCode :: PandocMonad m => TWParser m B.Inlines
+boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+
+htmlComment :: PandocMonad m => TWParser m B.Inlines
+htmlComment = htmlTag isCommentTag >> return mempty
+
+code :: PandocMonad m => TWParser m B.Inlines
+code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+
+codeHtml :: PandocMonad m => TWParser m B.Inlines
+codeHtml = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ B.codeWith attrs $ fromEntities content
+
+autoLink :: PandocMonad m => TWParser m B.Inlines
+autoLink = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ (text, url) <- parseLink
+ guard $ checkLink (head $ reverse url)
+ return $ makeLink (text, url)
+ where
+ parseLink = notFollowedBy nop >> (uri <|> emailAddress)
+ makeLink (text, url) = B.link url "" $ B.str text
+ checkLink c
+ | c == '/' = True
+ | otherwise = isAlphaNum c
+
+str :: PandocMonad m => TWParser m B.Inlines
+str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+
+nop :: PandocMonad m => TWParser m B.Inlines
+nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+ where
+ exclamation = char '!'
+ nopTag = stringAnyCase "<nop>"
+ followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+
+symbol :: PandocMonad m => TWParser m B.Inlines
+symbol = count 1 nonspaceChar >>= return . B.str
+
+smart :: PandocMonad m => TWParser m B.Inlines
+smart = do
+ guardEnabled Ext_smart
+ doubleQuoted <|> singleQuoted <|>
+ choice [ apostrophe
+ , dash
+ , ellipses
+ ]
+
+singleQuoted :: PandocMonad m => TWParser m B.Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ many1Till inline singleQuoteEnd >>=
+ (return . B.singleQuoted . B.trimInlines . mconcat)
+
+doubleQuoted :: PandocMonad m => TWParser m B.Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ return (B.doubleQuoted $ B.trimInlines contents))
+ <|> (return $ (B.str "\8220") B.<> contents)
+
+link :: PandocMonad m => TWParser m B.Inlines
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ B.link url title content
+
+linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
+linkText = do
+ string "[["
+ url <- many1Till anyChar (char ']')
+ content <- option [B.str url] linkContent
+ char ']'
+ return (url, "", mconcat content)
+ where
+ linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ parseLinkContent = parseFromString $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
new file mode 100644
index 000000000..6594b9ab8
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -0,0 +1,729 @@
+{-
+Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+ and John MacFarlane
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Textile
+ Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Paul Rivier <paul*rivier#demotera*com>
+ Stability : alpha
+ Portability : portable
+
+Conversion from Textile to 'Pandoc' document, based on the spec
+available at http://redcloth.org/textile.
+
+Implemented and parsed:
+ - Paragraphs
+ - Code blocks
+ - Lists
+ - blockquote
+ - Inlines : strong, emph, cite, code, deleted, superscript,
+ subscript, links
+ - footnotes
+ - HTML-specific and CSS-specific attributes on headers
+
+Left to be implemented:
+ - dimension sign
+ - all caps
+ - continued blocks (ex bq..)
+
+TODO : refactor common patterns across readers :
+ - more ...
+
+-}
+
+
+module Text.Pandoc.Readers.Textile ( readTextile) where
+import Text.Pandoc.CSS
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Logging
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
+import Text.Pandoc.Shared (trim)
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
+import Text.HTML.TagSoup (fromAttrib, Tag(..))
+import Text.HTML.TagSoup.Match
+import Data.List ( intercalate, transpose, intersperse )
+import Data.Char ( digitToInt, isUpper )
+import Control.Monad ( guard, liftM )
+import Data.Monoid ((<>))
+import Text.Pandoc.Class (PandocMonad, report)
+import Control.Monad.Except (throwError)
+
+-- | Parse a Textile text and return a Pandoc document.
+readTextile :: PandocMonad m
+ => ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> m Pandoc
+readTextile opts s = do
+ parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n")
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+
+-- | Generate a Pandoc ADT from a textile document
+parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
+parseTextile = do
+ many blankline
+ startPos <- getPosition
+ -- go through once just to get list of reference keys and notes
+ -- docMinusKeys is the raw document with blanks where the keys/notes were...
+ let firstPassParser = noteBlock <|> lineClump
+ manyTill firstPassParser eof >>= setInput . concat
+ setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ -- now parse it for real...
+ blocks <- parseBlocks
+ return $ Pandoc nullMeta (B.toList blocks) -- FIXME
+
+noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
+noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
+
+noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ ref <- noteMarker
+ optional blankline
+ contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
+ endPos <- getPosition
+ let newnote = (ref, contents ++ "\n")
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+-- | Parse document blocks
+parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+-- | Block parsers list tried in definition order
+blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
+blockParsers = [ codeBlock
+ , header
+ , blockQuote
+ , hrule
+ , commentBlock
+ , anyList
+ , rawHtmlBlock
+ , rawLaTeXBlock'
+ , table
+ , maybeExplicitBlock "p" para
+ , mempty <$ blanklines
+ ]
+
+-- | Any block in the order of definition of blockParsers
+block :: PandocMonad m => ParserT [Char] ParserState m Blocks
+block = do
+ res <- choice blockParsers <?> "block"
+ pos <- getPosition
+ report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ return res
+
+commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+commentBlock = try $ do
+ string "###."
+ manyTill anyLine blanklines
+ return mempty
+
+codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlock = codeBlockBc <|> codeBlockPre
+
+codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockBc = try $ do
+ string "bc."
+ extended <- option False (True <$ char '.')
+ char ' '
+ let starts = ["p", "table", "bq", "bc", "h1", "h2", "h3",
+ "h4", "h5", "h6", "pre", "###", "notextile"]
+ let ender = choice $ map explicitBlockStart starts
+ contents <- if extended
+ then do
+ f <- anyLine
+ rest <- many (notFollowedBy ender *> anyLine)
+ return (f:rest)
+ else manyTill anyLine blanklines
+ return $ B.codeBlock (trimTrailingNewlines (unlines contents))
+
+trimTrailingNewlines :: String -> String
+trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
+
+-- | Code Blocks in Textile are between <pre> and </pre>
+codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockPre = try $ do
+ (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
+ result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
+ optional blanklines
+ -- drop leading newline if any
+ let result'' = case result' of
+ '\n':xs -> xs
+ _ -> result'
+ -- drop trailing newline if any
+ let result''' = case reverse result'' of
+ '\n':_ -> init result''
+ _ -> result''
+ let classes = words $ fromAttrib "class" t
+ let ident = fromAttrib "id" t
+ let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ B.codeBlockWith (ident,classes,kvs) result'''
+
+-- | Header of the form "hN. content" with N in 1..6
+header :: PandocMonad m => ParserT [Char] ParserState m Blocks
+header = try $ do
+ char 'h'
+ level <- digitToInt <$> oneOf "123456"
+ attr <- attributes
+ char '.'
+ lookAhead whitespace
+ name <- trimInlines . mconcat <$> many inline
+ attr' <- registerHeader attr name
+ return $ B.headerWith attr' level name
+
+-- | Blockquote of the form "bq. content"
+blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
+blockQuote = try $ do
+ string "bq" >> attributes >> char '.' >> whitespace
+ B.blockQuote <$> para
+
+-- Horizontal rule
+
+hrule :: PandocMonad m => ParserT [Char] st m Blocks
+hrule = try $ do
+ skipSpaces
+ start <- oneOf "-*"
+ count 2 (skipSpaces >> char start)
+ skipMany (spaceChar <|> char start)
+ newline
+ optional blanklines
+ return B.horizontalRule
+
+-- Lists handling
+
+-- | Can be a bullet list or an ordered list. This implementation is
+-- strict in the nesting, sublist must start at exactly "parent depth
+-- plus one"
+anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+anyList = try $ anyListAtDepth 1 <* blanklines
+
+-- | This allow one type of list to be nested into an other type,
+-- provided correct nesting
+anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+anyListAtDepth depth = choice [ bulletListAtDepth depth,
+ orderedListAtDepth depth,
+ definitionList ]
+
+-- | Bullet List of given depth, depth being the number of leading '*'
+bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
+
+-- | Bullet List Item of given depth, depth being the number of
+-- leading '*'
+bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListItemAtDepth = genericListItemAtDepth '*'
+
+-- | Ordered List of given depth, depth being the number of
+-- leading '#'
+orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListAtDepth depth = try $ do
+ items <- many1 (orderedListItemAtDepth depth)
+ return $ B.orderedList items
+
+-- | Ordered List Item of given depth, depth being the number of
+-- leading '#'
+orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListItemAtDepth = genericListItemAtDepth '#'
+
+-- | Common implementation of list items
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
+genericListItemAtDepth c depth = try $ do
+ count depth (char c) >> attributes >> whitespace
+ p <- mconcat <$> many listInline
+ newline
+ sublist <- option mempty (anyListAtDepth (depth + 1))
+ return $ (B.plain p) <> sublist
+
+-- | A definition list is a set of consecutive definition items
+definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+definitionList = try $ B.definitionList <$> many1 definitionListItem
+
+-- | List start character.
+listStart :: PandocMonad m => ParserT [Char] ParserState m ()
+listStart = genericListStart '*'
+ <|> () <$ genericListStart '#'
+ <|> () <$ definitionListStart
+
+genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
+genericListStart c = () <$ try (many1 (char c) >> whitespace)
+
+basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
+basicDLStart = do
+ char '-'
+ whitespace
+ notFollowedBy newline
+
+definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
+definitionListStart = try $ do
+ basicDLStart
+ trimInlines . mconcat <$>
+ many1Till inline
+ ( try (newline *> lookAhead basicDLStart)
+ <|> try (lookAhead (() <$ string ":="))
+ )
+
+listInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+listInline = try (notFollowedBy newline >> inline)
+ <|> try (endline <* notFollowedBy listStart)
+
+-- | A definition list item in textile begins with '- ', followed by
+-- the term defined, then spaces and ":=". The definition follows, on
+-- the same single line, or spaned on multiple line, after a line
+-- break.
+definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
+definitionListItem = try $ do
+ term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
+ def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
+ return (term, def')
+ where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ inlineDef = liftM (\d -> [B.plain d])
+ $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
+ multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ multilineDef = try $ do
+ optional whitespace >> newline
+ s <- many1Till anyChar (try (string "=:" >> newline))
+ -- this ++ "\n\n" does not look very good
+ ds <- parseFromString parseBlocks (s ++ "\n\n")
+ return [ds]
+
+-- raw content
+
+-- | A raw Html Block, optionally followed by blanklines
+rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawHtmlBlock = try $ do
+ skipMany spaceChar
+ (_,b) <- htmlTag isBlockTag
+ optional blanklines
+ return $ B.rawBlock "html" b
+
+-- | Raw block of LaTeX content
+rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawLaTeXBlock' = do
+ guardEnabled Ext_raw_tex
+ B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
+
+
+-- | In textile, paragraphs are separated by blank lines.
+para :: PandocMonad m => ParserT [Char] ParserState m Blocks
+para = B.para . trimInlines . mconcat <$> many1 inline
+
+-- Tables
+
+toAlignment :: Char -> Alignment
+toAlignment '<' = AlignLeft
+toAlignment '>' = AlignRight
+toAlignment '=' = AlignCenter
+toAlignment _ = AlignDefault
+
+cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
+cellAttributes = try $ do
+ isHeader <- option False (True <$ char '_')
+ -- we just ignore colspan and rowspan markers:
+ optional $ try $ oneOf "/\\" >> many1 digit
+ -- we pay attention to alignments:
+ alignment <- option AlignDefault $ toAlignment <$> oneOf "<>="
+ -- ignore other attributes for now:
+ _ <- attributes
+ char '.'
+ return (isHeader, alignment)
+
+-- | A table cell spans until a pipe |
+tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
+tableCell = try $ do
+ char '|'
+ (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes
+ notFollowedBy blankline
+ raw <- trim <$>
+ many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
+ content <- mconcat <$> parseFromString (many inline) raw
+ return ((isHeader, alignment), B.plain content)
+
+-- | A table row is made of many table cells
+tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
+tableRow = try $ do
+ -- skip optional row attributes
+ optional $ try $ do
+ _ <- attributes
+ char '.'
+ many1 spaceChar
+ many1 tableCell <* char '|' <* blankline
+
+-- | A table with an optional header.
+table :: PandocMonad m => ParserT [Char] ParserState m Blocks
+table = try $ do
+ -- ignore table attributes
+ caption <- option mempty $ try $ do
+ string "table"
+ _ <- attributes
+ char '.'
+ rawcapt <- trim <$> anyLine
+ parseFromString (mconcat <$> many inline) rawcapt
+ rawrows <- many1 $ (skipMany ignorableRow) >> tableRow
+ skipMany ignorableRow
+ blanklines
+ let (headers, rows) = case rawrows of
+ (toprow:rest) | any (fst . fst) toprow ->
+ (toprow, rest)
+ _ -> (mempty, rawrows)
+ let nbOfCols = max (length headers) (length $ head rows)
+ let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+ return $ B.table caption
+ (zip aligns (replicate nbOfCols 0.0))
+ (map snd headers)
+ (map (map snd) rows)
+
+-- | Ignore markers for cols, thead, tfoot.
+ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
+ignorableRow = try $ do
+ char '|'
+ oneOf ":^-~"
+ _ <- attributes
+ char '.'
+ _ <- anyLine
+ return ()
+
+explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
+explicitBlockStart name = try $ do
+ string name
+ attributes
+ char '.'
+ optional whitespace
+ optional endline
+
+-- | Blocks like 'p' and 'table' do not need explicit block tag.
+-- However, they can be used to set HTML/CSS attributes when needed.
+maybeExplicitBlock :: PandocMonad m
+ => String -- ^ block tag name
+ -> ParserT [Char] ParserState m Blocks -- ^ implicit block
+ -> ParserT [Char] ParserState m Blocks
+maybeExplicitBlock name blk = try $ do
+ optional $ explicitBlockStart name
+ blk
+
+
+
+----------
+-- Inlines
+----------
+
+
+-- | Any inline element
+inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inline = do
+ choice inlineParsers <?> "inline"
+
+-- | Inline parsers tried in order
+inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
+inlineParsers = [ str
+ , whitespace
+ , endline
+ , code
+ , escapedInline
+ , inlineMarkup
+ , groupedInlineMarkup
+ , rawHtmlInline
+ , rawLaTeXInline'
+ , note
+ , link
+ , image
+ , mark
+ , (B.str . (:[])) <$> characterReference
+ , smartPunctuation inline
+ , symbol
+ ]
+
+-- | Inline markups
+inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
+ , simpleInline (string "**") B.strong
+ , simpleInline (string "__") B.emph
+ , simpleInline (char '*') B.strong
+ , simpleInline (char '_') B.emph
+ , simpleInline (char '+') B.emph -- approximates underline
+ , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
+ , simpleInline (char '^') B.superscript
+ , simpleInline (char '~') B.subscript
+ , simpleInline (char '%') id
+ ]
+
+-- | Trademark, registered, copyright
+mark :: PandocMonad m => ParserT [Char] st m Inlines
+mark = try $ char '(' >> (try tm <|> try reg <|> copy)
+
+reg :: PandocMonad m => ParserT [Char] st m Inlines
+reg = do
+ oneOf "Rr"
+ char ')'
+ return $ B.str "\174"
+
+tm :: PandocMonad m => ParserT [Char] st m Inlines
+tm = do
+ oneOf "Tt"
+ oneOf "Mm"
+ char ')'
+ return $ B.str "\8482"
+
+copy :: PandocMonad m => ParserT [Char] st m Inlines
+copy = do
+ oneOf "Cc"
+ char ')'
+ return $ B.str "\169"
+
+note :: PandocMonad m => ParserT [Char] ParserState m Inlines
+note = try $ do
+ ref <- (char '[' *> many1 digit <* char ']')
+ notes <- stateNotes <$> getState
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just raw -> B.note <$> parseFromString parseBlocks raw
+
+-- | Special chars
+markupChars :: [Char]
+markupChars = "\\*#_@~-+^|%=[]&"
+
+-- | Break strings on following chars. Space tab and newline break for
+-- inlines breaking. Open paren breaks for mark. Quote, dash and dot
+-- break for smart punctuation. Punctuation breaks for regular
+-- punctuation. Double quote breaks for named links. > and < break
+-- for inline html.
+stringBreakers :: [Char]
+stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
+
+wordBoundaries :: [Char]
+wordBoundaries = markupChars ++ stringBreakers
+
+-- | Parse a hyphened sequence of words
+hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
+hyphenedWords = do
+ x <- wordChunk
+ xs <- many (try $ char '-' >> wordChunk)
+ return $ intercalate "-" (x:xs)
+
+wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
+wordChunk = try $ do
+ hd <- noneOf wordBoundaries
+ tl <- many ( (noneOf wordBoundaries) <|>
+ try (notFollowedBy' note *> oneOf markupChars
+ <* lookAhead (noneOf wordBoundaries) ) )
+ return $ hd:tl
+
+-- | Any string
+str :: PandocMonad m => ParserT [Char] ParserState m Inlines
+str = do
+ baseStr <- hyphenedWords
+ -- RedCloth compliance : if parsed word is uppercase and immediatly
+ -- followed by parens, parens content is unconditionally word acronym
+ fullStr <- option baseStr $ try $ do
+ guard $ all isUpper baseStr
+ acro <- enclosed (char '(') (char ')') anyChar'
+ return $ concat [baseStr, " (", acro, ")"]
+ updateLastStrPos
+ return $ B.str fullStr
+
+-- | Some number of space chars
+whitespace :: PandocMonad m => ParserT [Char] st m Inlines
+whitespace = many1 spaceChar >> return B.space <?> "whitespace"
+
+-- | In Textile, an isolated endline character is a line break
+endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy listStart
+ notFollowedBy rawHtmlBlock
+ return B.linebreak
+
+rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
+
+-- | Raw LaTeX Inline
+rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawLaTeXInline' = try $ do
+ guardEnabled Ext_raw_tex
+ B.singleton <$> rawLaTeXInline
+
+-- | Textile standard link syntax is "label":target. But we
+-- can also have ["label":target].
+link :: PandocMonad m => ParserT [Char] ParserState m Inlines
+link = try $ do
+ bracketed <- (True <$ char '[') <|> return False
+ char '"' *> notFollowedBy (oneOf " \t\n\r")
+ attr <- attributes
+ name <- trimInlines . mconcat <$>
+ withQuoteContext InDoubleQuote (many1Till inline (char '"'))
+ char ':'
+ let stop = if bracketed
+ then char ']'
+ else lookAhead $ space <|>
+ try (oneOf "!.,;:" *> (space <|> newline))
+ url <- many1Till nonspaceChar stop
+ let name' = if B.toList name == [Str "$"] then B.str url else name
+ return $ if attr == nullAttr
+ then B.link url "" name'
+ else B.spanWith attr $ B.link url "" name'
+
+-- | image embedding
+image :: PandocMonad m => ParserT [Char] ParserState m Inlines
+image = try $ do
+ char '!' >> notFollowedBy space
+ (ident, cls, kvs) <- attributes
+ let attr = case lookup "style" kvs of
+ Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
+ Nothing -> (ident, cls, kvs)
+ src <- many1 (noneOf " \t\n\r!(")
+ alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')')
+ char '!'
+ return $ B.imageWith attr src alt (B.str alt)
+
+escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+escapedInline = escapedEqs <|> escapedTag
+
+escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
+escapedEqs = B.str <$>
+ (try $ string "==" *> manyTill anyChar' (try $ string "=="))
+
+-- | literal text escaped btw <notextile> tags
+escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
+escapedTag = B.str <$>
+ (try $ string "<notextile>" *>
+ manyTill anyChar' (try $ string "</notextile>"))
+
+-- | Any special symbol defined in wordBoundaries
+symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
+symbol = B.str . singleton <$> (notFollowedBy newline *>
+ notFollowedBy rawHtmlBlock *>
+ oneOf wordBoundaries)
+
+-- | Inline code
+code :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code = code1 <|> code2
+
+-- any character except a newline before a blank line
+anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
+anyChar' =
+ satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
+
+code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code1 = B.code <$> surrounded (char '@') anyChar'
+
+code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code2 = do
+ htmlTag (tagOpen (=="tt") null)
+ B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
+
+-- | Html / CSS attributes
+attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
+attributes = (foldl (flip ($)) ("",[],[])) <$>
+ try (do special <- option id specialAttribute
+ attrs <- many attribute
+ return (special : attrs))
+
+specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+specialAttribute = do
+ alignStr <- ("center" <$ char '=') <|>
+ ("justify" <$ try (string "<>")) <|>
+ ("right" <$ char '>') <|>
+ ("left" <$ char '<')
+ notFollowedBy spaceChar
+ return $ addStyle ("text-align:" ++ alignStr)
+
+attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+attribute = try $
+ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
+
+classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+classIdAttr = try $ do -- (class class #id)
+ char '('
+ ws <- words `fmap` manyTill anyChar' (char ')')
+ case reverse ws of
+ [] -> return $ \(_,_,keyvals) -> ("",[],keyvals)
+ (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
+ (ident',classes',keyvals)
+ classes' -> return $ \(_,_,keyvals) ->
+ ("",classes',keyvals)
+
+styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+styleAttr = do
+ style <- try $ enclosed (char '{') (char '}') anyChar'
+ return $ addStyle style
+
+addStyle :: String -> Attr -> Attr
+addStyle style (id',classes,keyvals) =
+ (id',classes,keyvals')
+ where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
+ style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
+
+langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+langAttr = do
+ lang <- try $ enclosed (char '[') (char ']') alphaNum
+ return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
+
+-- | Parses material surrounded by a parser.
+surrounded :: PandocMonad m
+ => ParserT [Char] st m t -- ^ surrounding parser
+ -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT [Char] st m [a]
+surrounded border =
+ enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
+
+simpleInline :: PandocMonad m
+ => ParserT [Char] ParserState m t -- ^ surrounding parser
+ -> (Inlines -> Inlines) -- ^ Inline constructor
+ -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
+simpleInline border construct = try $ do
+ notAfterString
+ border *> notFollowedBy (oneOf " \t\n\r")
+ attr <- attributes
+ body <- trimInlines . mconcat <$>
+ withQuoteContext InSingleQuote
+ (manyTill (notFollowedBy newline >> inline)
+ (try border <* notFollowedBy alphaNum))
+ return $ construct $
+ if attr == nullAttr
+ then body
+ else B.spanWith attr body
+
+groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+groupedInlineMarkup = try $ do
+ char '['
+ sp1 <- option mempty $ B.space <$ whitespace
+ result <- withQuoteContext InSingleQuote inlineMarkup
+ sp2 <- option mempty $ B.space <$ whitespace
+ char ']'
+ return $ sp1 <> result <> sp2
+
+-- | Create a singleton list
+singleton :: a -> [a]
+singleton x = [x]
+
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
new file mode 100644
index 000000000..9e2b6963d
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -0,0 +1,596 @@
+{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Txt2Tags
+ Copyright : Copyright (C) 2014 Matthew Pickering
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
+
+Conversion of txt2tags formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
+ , getT2TMeta
+ , T2TMeta (..)
+ )
+ where
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
+import Data.Monoid ((<>))
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (escapeURI,compactify, compactifyDL)
+import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
+import Data.Char (toLower)
+import Data.List (transpose, intersperse, intercalate)
+import Data.Maybe (fromMaybe)
+--import Network.URI (isURI) -- Not sure whether to use this function
+import Control.Monad (void, guard, when)
+import Data.Default
+import Control.Monad.Reader (Reader, runReader, asks)
+
+import Data.Time.Format (formatTime)
+import Text.Pandoc.Compat.Time (defaultTimeLocale)
+import Control.Monad.Except (throwError, catchError)
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+
+type T2T = ParserT String ParserState (Reader T2TMeta)
+
+-- | An object for the T2T macros meta information
+-- the contents of each field is simply substituted verbatim into the file
+data T2TMeta = T2TMeta {
+ date :: String -- ^ Current date
+ , mtime :: String -- ^ Last modification time of infile
+ , infile :: FilePath -- ^ Input file
+ , outfile :: FilePath -- ^ Output file
+ } deriving Show
+
+instance Default T2TMeta where
+ def = T2TMeta "" "" "" ""
+
+-- | Get the meta information required by Txt2Tags macros
+getT2TMeta :: PandocMonad m => m T2TMeta
+getT2TMeta = do
+ mbInps <- P.getInputFiles
+ let inps = case mbInps of
+ Just x -> x
+ Nothing -> []
+ mbOutp <- P.getOutputFile
+ let outp = case mbOutp of
+ Just x -> x
+ Nothing -> ""
+ curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
+ let getModTime = fmap (formatTime defaultTimeLocale "%T") .
+ P.getModificationTime
+ curMtime <- case inps of
+ [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+ _ -> catchError
+ (maximum <$> mapM getModTime inps)
+ (const (return ""))
+ return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
+
+-- | Read Txt2Tags from an input string returning a Pandoc document
+readTxt2Tags :: PandocMonad m
+ => ReaderOptions
+ -> String
+ -> m Pandoc
+readTxt2Tags opts s = do
+ meta <- getT2TMeta
+ let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+ case parsed of
+ Right result -> return $ result
+ Left e -> throwError e
+
+-- | Read Txt2Tags (ignoring all macros) from an input string returning
+-- a Pandoc document
+-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+-- readTxt2TagsNoMacros = readTxt2Tags
+
+parseT2T :: T2T Pandoc
+parseT2T = do
+ -- Parse header if standalone flag is set
+ standalone <- getOption readerStandalone
+ when standalone parseHeader
+ body <- mconcat <$> manyTill block eof
+ meta' <- stateMeta <$> getState
+ return $ Pandoc meta' (B.toList body)
+
+parseHeader :: T2T ()
+parseHeader = do
+ () <$ try blankline <|> header
+ meta <- stateMeta <$> getState
+ optional blanklines
+ config <- manyTill setting (notFollowedBy setting)
+ -- TODO: Handle settings better
+ let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) meta config
+ updateState (\s -> s {stateMeta = settings}) <* optional blanklines
+
+header :: T2T ()
+header = titleline >> authorline >> dateline
+
+headerline :: B.ToMetaValue a => String -> T2T a -> T2T ()
+headerline field p = (() <$ try blankline)
+ <|> (p >>= updateState . B.setMeta field)
+
+titleline :: T2T ()
+titleline =
+ headerline "title" (trimInlines . mconcat <$> manyTill inline newline)
+
+authorline :: T2T ()
+authorline =
+ headerline "author" (sepBy author (char ';') <* newline)
+ where
+ author = trimInlines . mconcat <$> many (notFollowedBy (char ';' <|> newline) >> inline)
+
+dateline :: T2T ()
+dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline)
+
+type Keyword = String
+type Value = String
+
+setting :: T2T (Keyword, Value)
+setting = do
+ string "%!"
+ keyword <- ignoreSpacesCap (many1 alphaNum)
+ char ':'
+ value <- ignoreSpacesCap (manyTill anyChar (newline))
+ return (keyword, value)
+
+-- Blocks
+
+parseBlocks :: T2T Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: T2T Blocks
+block = do
+ choice
+ [ mempty <$ blanklines
+ , quote
+ , hrule -- hrule must go above title
+ , title
+ , commentBlock
+ , verbatim
+ , rawBlock
+ , taggedBlock
+ , list
+ , table
+ , para
+ ]
+
+title :: T2T Blocks
+title = try $ balancedTitle '+' <|> balancedTitle '='
+
+balancedTitle :: Char -> T2T Blocks
+balancedTitle c = try $ do
+ spaces
+ level <- length <$> many1 (char c)
+ guard (level <= 5) -- Max header level 5
+ heading <- manyTill (noneOf "\n\r") (count level (char c))
+ label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
+ many spaceChar *> newline
+ let attr = maybe nullAttr (\x -> (x, [], [])) label
+ return $ B.headerWith attr level (trimInlines $ B.text heading)
+
+para :: T2T Blocks
+para = try $ do
+ ils <- parseInlines
+ nl <- option False (True <$ newline)
+ option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
+ where
+ listStart = try bulletListStart <|> orderedListStart
+
+commentBlock :: T2T Blocks
+commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
+
+-- Seperator and Strong line treated the same
+hrule :: T2T Blocks
+hrule = try $ do
+ spaces
+ line <- many1 (oneOf "=-_")
+ guard (length line >= 20)
+ B.horizontalRule <$ blankline
+
+quote :: T2T Blocks
+quote = try $ do
+ lookAhead tab
+ rawQuote <- many1 (tab *> optional spaces *> anyLine)
+ contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ return $ B.blockQuote contents
+
+commentLine :: T2T Inlines
+commentLine = comment
+
+-- List Parsing code from Org Reader
+
+list :: T2T Blocks
+list = choice [bulletList, orderedList, definitionList]
+
+bulletList :: T2T Blocks
+bulletList = B.bulletList . compactify
+ <$> many1 (listItem bulletListStart parseBlocks)
+
+orderedList :: T2T Blocks
+orderedList = B.orderedList . compactify
+ <$> many1 (listItem orderedListStart parseBlocks)
+
+definitionList :: T2T Blocks
+definitionList = try $ do
+ B.definitionList . compactifyDL <$>
+ many1 (listItem definitionListStart definitionListEnd)
+
+definitionListEnd :: T2T (Inlines, [Blocks])
+definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
+
+genericListStart :: T2T Char
+ -> T2T Int
+genericListStart listMarker = try $
+ (2+) <$> (length <$> many spaceChar
+ <* listMarker <* space <* notFollowedBy space)
+
+-- parses bullet list \start and returns its length (excl. following whitespace)
+bulletListStart :: T2T Int
+bulletListStart = genericListStart (char '-')
+
+orderedListStart :: T2T Int
+orderedListStart = genericListStart (char '+' )
+
+definitionListStart :: T2T Int
+definitionListStart = genericListStart (char ':')
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: T2T Int
+ -> T2T a
+ -> T2T a
+listItem start end = try $ do
+ markerLength <- try start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ rest <- concat <$> many (listContinuation markerLength)
+ parseFromString end $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+ -> T2T String
+listContinuation markerLength = try $
+ notFollowedBy' (blankline >> blankline)
+ *> (mappend <$> (concat <$> many1 listLine)
+ <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+anyLineNewline :: T2T String
+anyLineNewline = (++ "\n") <$> anyLine
+
+indentWith :: Int -> T2T String
+indentWith n = count n space
+
+-- Table
+
+table :: T2T Blocks
+table = try $ do
+ tableHeader <- fmap snd <$> option mempty (try headerRow)
+ rows <- many1 (many commentLine *> tableRow)
+ let columns = transpose rows
+ let ncolumns = length columns
+ let aligns = map (foldr1 findAlign) (map (map fst) columns)
+ let rows' = map (map snd) rows
+ let size = maximum (map length rows')
+ let rowsPadded = map (pad size) rows'
+ let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty
+ return $ B.table mempty
+ (zip aligns (replicate ncolumns 0.0))
+ headerPadded rowsPadded
+
+pad :: (Monoid a) => Int -> [a] -> [a]
+pad n xs = xs ++ (replicate (n - length xs) mempty)
+
+
+findAlign :: Alignment -> Alignment -> Alignment
+findAlign x y
+ | x == y = x
+ | otherwise = AlignDefault
+
+headerRow :: T2T [(Alignment, Blocks)]
+headerRow = genericRow (string "||")
+
+tableRow :: T2T [(Alignment, Blocks)]
+tableRow = genericRow (char '|')
+
+genericRow :: T2T a -> T2T [(Alignment, Blocks)]
+genericRow start = try $ do
+ spaces *> start
+ manyTill tableCell newline <?> "genericRow"
+
+
+tableCell :: T2T (Alignment, Blocks)
+tableCell = try $ do
+ leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
+ content <- (manyTill inline (try $ lookAhead (cellEnd)))
+ rightSpaces <- length <$> many space
+ let align =
+ case compare leftSpaces rightSpaces of
+ LT -> AlignLeft
+ EQ -> AlignCenter
+ GT -> AlignRight
+ endOfCell
+ return $ (align, B.plain (B.trimInlines $ mconcat content))
+ where
+ cellEnd = (void newline <|> (many1 space *> endOfCell))
+
+endOfCell :: T2T ()
+endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
+
+-- Raw area
+
+verbatim :: T2T Blocks
+verbatim = genericBlock anyLineNewline B.codeBlock "```"
+
+rawBlock :: T2T Blocks
+rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
+
+taggedBlock :: T2T Blocks
+taggedBlock = do
+ target <- getTarget
+ genericBlock anyLineNewline (B.rawBlock target) "'''"
+
+-- Generic
+
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
+
+blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea p f s = try $ (do
+ string s *> blankline
+ f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
+
+blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+
+-- Can be in either block or inline position
+comment :: Monoid a => T2T a
+comment = try $ do
+ atStart
+ notFollowedBy macro
+ mempty <$ (char '%' *> anyLine)
+
+-- Inline
+
+parseInlines :: T2T Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
+
+inline :: T2T Inlines
+inline = do
+ choice
+ [ endline
+ , macro
+ , commentLine
+ , whitespace
+ , url
+ , link
+ , image
+ , bold
+ , underline
+ , code
+ , raw
+ , tagged
+ , strike
+ , italic
+ , code
+ , str
+ , symbol
+ ]
+
+bold :: T2T Inlines
+bold = inlineMarkup inline B.strong '*' (B.str)
+
+underline :: T2T Inlines
+underline = inlineMarkup inline B.emph '_' (B.str)
+
+strike :: T2T Inlines
+strike = inlineMarkup inline B.strikeout '-' (B.str)
+
+italic :: T2T Inlines
+italic = inlineMarkup inline B.emph '/' (B.str)
+
+code :: T2T Inlines
+code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+
+raw :: T2T Inlines
+raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+
+tagged :: T2T Inlines
+tagged = do
+ target <- getTarget
+ inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+
+-- Parser for markup indicated by a double character.
+-- Inline markup is greedy and glued
+-- Greedy meaning ***a*** = Bold [Str "*a*"]
+-- Glued meaning that markup must be tight to content
+-- Markup can't pass newlines
+inlineMarkup :: Monoid a
+ => (T2T a) -- Content parser
+ -> (a -> Inlines) -- Constructor
+ -> Char -- Fence
+ -> (String -> a) -- Special Case to handle ******
+ -> T2T Inlines
+inlineMarkup p f c special = try $ do
+ start <- many1 (char c)
+ let l = length start
+ guard (l >= 2)
+ when (l == 2) (void $ notFollowedBy space)
+ -- We must make sure that there is no space before the start of the
+ -- closing tags
+ body <- optionMaybe (try $ manyTill (noneOf "\n\r") $
+ (try $ lookAhead (noneOf " " >> string [c,c] )))
+ case body of
+ Just middle -> do
+ lastChar <- anyChar
+ end <- many1 (char c)
+ let parser inp = parseFromString (mconcat <$> many p) inp
+ let start' = case drop 2 start of
+ "" -> mempty
+ xs -> special xs
+ body' <- parser (middle ++ [lastChar])
+ let end' = case drop 2 end of
+ "" -> mempty
+ xs -> special xs
+ return $ f (start' <> body' <> end')
+ Nothing -> do -- Either bad or case such as *****
+ guard (l >= 5)
+ let body' = (replicate (l - 4) c)
+ return $ f (special body')
+
+link :: T2T Inlines
+link = try imageLink <|> titleLink
+
+-- Link with title
+titleLink :: T2T Inlines
+titleLink = try $ do
+ char '['
+ notFollowedBy space
+ tokens <- sepBy1 (many $ noneOf " ]") space
+ guard (length tokens >= 2)
+ char ']'
+ let link' = last tokens
+ guard (length link' > 0)
+ let tit = concat (intersperse " " (init tokens))
+ return $ B.link link' "" (B.text tit)
+
+-- Link with image
+imageLink :: T2T Inlines
+imageLink = try $ do
+ char '['
+ body <- image
+ many1 space
+ l <- manyTill (noneOf "\n\r ") (char ']')
+ return (B.link l "" body)
+
+macro :: T2T Inlines
+macro = try $ do
+ name <- string "%%" *> oneOfStringsCI (map fst commands)
+ optional (try $ enclosed (char '(') (char ')') anyChar)
+ lookAhead (spaceChar <|> oneOf specialChars <|> newline)
+ maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
+ where
+ commands = [ ("date", date), ("mtime", mtime)
+ , ("infile", infile), ("outfile", outfile)]
+
+-- raw URLs in text are automatically linked
+url :: T2T Inlines
+url = try $ do
+ (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
+ return $ B.link rawUrl "" (B.str escapedUrl)
+
+uri :: T2T (String, String)
+uri = try $ do
+ address <- t2tURI
+ return (address, escapeURI address)
+
+-- The definition of a URI in the T2T source differs from the
+-- actual definition. This is a transcription of the definition in
+-- the source of v2.6
+--isT2TURI :: String -> Bool
+--isT2TURI (parse t2tURI "" -> Right _) = True
+--isT2TURI _ = False
+
+t2tURI :: T2T String
+t2tURI = do
+ start <- try ((++) <$> proto <*> urlLogin) <|> guess
+ domain <- many1 chars
+ sep <- many (char '/')
+ form' <- option mempty ((:) <$> char '?' <*> many1 form)
+ anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
+ return (start ++ domain ++ sep ++ form' ++ anchor')
+ where
+ protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
+ proto = (++) <$> oneOfStrings protos <*> string "://"
+ guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
+ <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+ login = alphaNum <|> oneOf "_.-"
+ pass = many (noneOf " @")
+ chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
+ anchor = alphaNum <|> oneOf "%._0"
+ form = chars <|> oneOf ";*"
+ urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+
+
+image :: T2T Inlines
+image = try $ do
+ -- List taken from txt2tags source
+ let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
+ char '['
+ path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
+ ext <- oneOfStrings extensions
+ char ']'
+ return $ B.image (path ++ ext) "" mempty
+
+-- Characters used in markup
+specialChars :: String
+specialChars = "%*-_/|:+;"
+
+tab :: T2T Char
+tab = char '\t'
+
+space :: T2T Char
+space = char ' '
+
+spaces :: T2T String
+spaces = many space
+
+endline :: T2T Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy hrule
+ notFollowedBy title
+ notFollowedBy verbatim
+ notFollowedBy rawBlock
+ notFollowedBy taggedBlock
+ notFollowedBy quote
+ notFollowedBy list
+ notFollowedBy table
+ return $ B.softbreak
+
+str :: T2T Inlines
+str = try $ do
+ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+
+whitespace :: T2T Inlines
+whitespace = try $ B.space <$ spaceChar
+
+symbol :: T2T Inlines
+symbol = B.str . (:[]) <$> oneOf specialChars
+
+-- Utility
+
+getTarget :: T2T String
+getTarget = do
+ mv <- lookupMeta "target" . stateMeta <$> getState
+ let MetaString target = fromMaybe (MetaString "html") mv
+ return target
+
+atStart :: T2T ()
+atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+
+ignoreSpacesCap :: T2T String -> T2T String
+ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
new file mode 100644
index 000000000..85b298a85
--- /dev/null
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.SelfContained
+ Copyright : Copyright (C) 2011-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for converting an HTML file into one that can be viewed
+offline, by incorporating linked images, CSS, and scripts into
+the HTML using data URIs.
+-}
+module Text.Pandoc.SelfContained ( makeSelfContained ) where
+import Text.HTML.TagSoup
+import Network.URI (isURI, escapeURIString, URI(..), parseURI)
+import Data.ByteString.Base64
+import qualified Data.ByteString.Char8 as B
+import Data.ByteString (ByteString)
+import System.FilePath (takeExtension, takeDirectory, (</>))
+import Data.Char (toLower, isAscii, isAlphaNum)
+import Codec.Compression.GZip as Gzip
+import qualified Data.ByteString.Lazy as L
+import Control.Monad.Trans (MonadIO(..))
+import Text.Pandoc.Shared (renderTags', err, warn, trim)
+import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.MIME (MimeType)
+import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Options (WriterOptions(..))
+import Data.List (isPrefixOf)
+import Control.Applicative ((<|>))
+import Text.Parsec (runParserT, ParsecT)
+import qualified Text.Parsec as P
+import Control.Monad.Trans (lift)
+import Text.Pandoc.Class (fetchItem, runIO, setMediaBag)
+
+isOk :: Char -> Bool
+isOk c = isAscii c && isAlphaNum c
+
+makeDataURI :: String -> ByteString -> String
+makeDataURI mime raw =
+ if textual
+ then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
+ else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
+ where textual = "text/" `Data.List.isPrefixOf` mime
+ mime' = if textual && ';' `notElem` mime
+ then mime ++ ";charset=utf-8"
+ else mime -- mime type already has charset
+
+convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
+convertTag media sourceURL t@(TagOpen tagname as)
+ | tagname `elem`
+ ["img", "embed", "video", "input", "audio", "source", "track"] = do
+ as' <- mapM processAttribute as
+ return $ TagOpen tagname as'
+ where processAttribute (x,y) =
+ if x == "src" || x == "href" || x == "poster"
+ then do
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) y
+ return (x, enc)
+ else return (x,y)
+convertTag media sourceURL t@(TagOpen "script" as) =
+ case fromAttrib "src" t of
+ [] -> return t
+ src -> do
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) src
+ return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag media sourceURL t@(TagOpen "link" as) =
+ case fromAttrib "href" t of
+ [] -> return t
+ src -> do
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) src
+ return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
+convertTag _ _ t = return t
+
+cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
+ -> IO ByteString
+cssURLs media sourceURL d orig = do
+ res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
+ case res of
+ Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
+ Right bs -> return bs
+
+parseCSSUrls :: MediaBag -> Maybe String -> FilePath
+ -> ParsecT ByteString () IO ByteString
+parseCSSUrls media sourceURL d = B.concat <$> P.many
+ (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
+
+-- Note: some whitespace in CSS is significant, so we can't collapse it!
+pCSSWhite :: ParsecT ByteString () IO ByteString
+pCSSWhite = B.singleton <$> P.space <* P.spaces
+
+pCSSComment :: ParsecT ByteString () IO ByteString
+pCSSComment = P.try $ do
+ P.string "/*"
+ P.manyTill P.anyChar (P.try (P.string "*/"))
+ return B.empty
+
+pCSSOther :: ParsecT ByteString () IO ByteString
+pCSSOther = do
+ (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
+ (B.singleton <$> P.char 'u') <|>
+ (B.singleton <$> P.char '/')
+
+pCSSUrl :: MediaBag -> Maybe String -> FilePath
+ -> ParsecT ByteString () IO ByteString
+pCSSUrl media sourceURL d = P.try $ do
+ P.string "url("
+ P.spaces
+ quote <- P.option Nothing (Just <$> P.oneOf "\"'")
+ url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote)
+ P.spaces
+ P.char ')'
+ let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
+ maybe "" (:[]) quote ++ ")")
+ case trim url of
+ '#':_ -> return fallback
+ 'd':'a':'t':'a':':':_ -> return fallback
+ u -> do let url' = if isURI u then u else d </> u
+ enc <- lift $ getDataURI media sourceURL "" url'
+ return (B.pack $ "url(" ++ enc ++ ")")
+
+
+getDataURI :: MediaBag -> Maybe String -> MimeType -> String
+ -> IO String
+getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri
+getDataURI media sourceURL mimetype src = do
+ let ext = map toLower $ takeExtension src
+ fetchResult <- runIO $ do setMediaBag media
+ fetchItem sourceURL src
+ (raw, respMime) <- case fetchResult of
+ Left msg -> err 67 $ "Could not fetch " ++ src ++
+ "\n" ++ show msg
+ Right x -> return x
+ let raw' = if ext == ".gz"
+ then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
+ $ [raw]
+ else raw
+ let mime = case (mimetype, respMime) of
+ ("",Nothing) -> error
+ $ "Could not determine mime type for `" ++ src ++ "'"
+ (x, Nothing) -> x
+ (_, Just x ) -> x
+ let cssSourceURL = case parseURI src of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
+ result <- if mime == "text/css"
+ then cssURLs media cssSourceURL (takeDirectory src) raw'
+ else return raw'
+ return $ makeDataURI mime result
+
+-- | Convert HTML into self-contained HTML, incorporating images,
+-- scripts, and CSS using data: URIs.
+makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String
+makeSelfContained opts mediabag inp = liftIO $ do
+ let tags = parseTags inp
+ out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags
+ return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..268a5052e
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,883 @@
+{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
+ FlexibleContexts, ScopedTypeVariables, PatternGuards,
+ ViewPatterns #-}
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Shared
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
+ splitByIndices,
+ splitStringByIndices,
+ substitute,
+ ordNub,
+ -- * Text processing
+ backslashEscapes,
+ escapeStringUsing,
+ stripTrailingNewlines,
+ trim,
+ triml,
+ trimr,
+ stripFirstAndLast,
+ camelCaseToHyphenated,
+ toRomanNumeral,
+ escapeURI,
+ tabFilter,
+ -- * Date/time
+ normalizeDate,
+ -- * Pandoc block and inline list processing
+ orderedListMarkers,
+ normalizeSpaces,
+ extractSpaces,
+ removeFormatting,
+ deNote,
+ stringify,
+ capitalize,
+ compactify,
+ compactifyDL,
+ linesToPara,
+ Element (..),
+ hierarchicalize,
+ uniqueIdent,
+ inlineListToIdentifier,
+ isHeaderBlock,
+ headerShift,
+ isTightList,
+ addMetaField,
+ makeMeta,
+ -- * TagSoup HTML handling
+ renderTags',
+ -- * File handling
+ inDirectory,
+ getDefaultReferenceDocx,
+ getDefaultReferenceODT,
+ readDataFile,
+ readDataFileUTF8,
+ openURL,
+ collapseFilePath,
+ filteredFilesFromArchive,
+ -- * Error handling
+ err,
+ warn,
+ mapLeft,
+ -- * for squashing blocks
+ blocksToInlines,
+ -- * Safe read
+ safeRead,
+ -- * Temp directory
+ withTempDir,
+ -- * Version
+ pandocVersion
+ ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.UTF8 as UTF8
+import System.Exit (exitWith, ExitCode(..))
+import Data.Char ( toLower, isLower, isUpper, isAlpha,
+ isLetter, isDigit, isSpace )
+import Data.List ( find, stripPrefix, intercalate )
+import Data.Maybe (mapMaybe)
+import Data.Version ( showVersion )
+import qualified Data.Map as M
+import Network.URI ( escapeURIString, unEscapeString )
+import qualified Data.Set as Set
+import System.Directory
+import System.FilePath (splitDirectories, isPathSeparator)
+import qualified System.FilePath.Posix as Posix
+import Text.Pandoc.MIME (MimeType)
+import System.FilePath ( (</>) )
+import Data.Generics (Typeable, Data)
+import qualified Control.Monad.State as S
+import Control.Monad.Trans (MonadIO (..))
+import qualified Control.Exception as E
+import Control.Monad (msum, unless, MonadPlus(..))
+import Text.Pandoc.Pretty (charWidth)
+import Text.Pandoc.Compat.Time
+import Data.Time.Clock.POSIX
+import System.IO (stderr)
+import System.IO.Temp
+import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
+ renderOptions)
+import Data.Monoid ((<>))
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as B8
+import Data.ByteString.Base64 (decodeLenient)
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
+import Data.ByteString.Lazy (toChunks, fromChunks)
+import qualified Data.ByteString.Lazy as BL
+import Paths_pandoc (version)
+
+import Codec.Archive.Zip
+
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#else
+import Paths_pandoc (getDataFileName)
+#endif
+#ifdef HTTP_CLIENT
+import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
+ Request(port,host,requestHeaders))
+import Network.HTTP.Client (parseRequest)
+import Network.HTTP.Client (newManager)
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import System.Environment (getEnv)
+import Network.HTTP.Types.Header ( hContentType, hUserAgent)
+import Network (withSocketsDo)
+#else
+import Network.URI (parseURI)
+import Network.HTTP (findHeader, rspBody,
+ RequestMethod(..), HeaderName(..), mkRequest)
+import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
+#endif
+
+-- | Version number of pandoc library.
+pandocVersion :: String
+pandocVersion = showVersion version
+
+--
+-- List processing
+--
+
+-- | Split list by groups of one or more sep.
+splitBy :: (a -> Bool) -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy isSep lst =
+ let (first, rest) = break isSep lst
+ rest' = dropWhile isSep rest
+ in first:(splitBy isSep rest')
+
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
+ where (first, rest) = splitAt x lst
+
+-- | Split string into chunks divided at specified indices.
+splitStringByIndices :: [Int] -> [Char] -> [[Char]]
+splitStringByIndices [] lst = [lst]
+splitStringByIndices (x:xs) lst =
+ let (first, rest) = splitAt' x lst in
+ first : (splitStringByIndices (map (\y -> y - x) xs) rest)
+
+splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' _ [] = ([],[])
+splitAt' n xs | n <= 0 = ([],xs)
+splitAt' n (x:xs) = (x:ys,zs)
+ where (ys,zs) = splitAt' (n - charWidth x) xs
+
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ xs = xs
+substitute target replacement lst@(x:xs) =
+ case stripPrefix target lst of
+ Just lst' -> replacement ++ substitute target replacement lst'
+ Nothing -> x : substitute target replacement xs
+
+ordNub :: (Ord a) => [a] -> [a]
+ordNub l = go Set.empty l
+ where
+ go _ [] = []
+ go s (x:xs) = if x `Set.member` s then go s xs
+ else x : go (Set.insert x s) xs
+
+--
+-- Text processing
+--
+
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing _ [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+
+-- | Remove leading and trailing space (including newlines) from string.
+trim :: String -> String
+trim = triml . trimr
+
+-- | Remove leading space (including newlines) from string.
+triml :: String -> String
+triml = dropWhile (`elem` " \r\n\t")
+
+-- | Remove trailing space (including newlines) from string.
+trimr :: String -> String
+trimr = reverse . triml . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x
+ | x >= 4000 || x < 0 = "?"
+ | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
+ | x >= 900 = "CM" ++ toRomanNumeral (x - 900)
+ | x >= 500 = "D" ++ toRomanNumeral (x - 500)
+ | x >= 400 = "CD" ++ toRomanNumeral (x - 400)
+ | x >= 100 = "C" ++ toRomanNumeral (x - 100)
+ | x >= 90 = "XC" ++ toRomanNumeral (x - 90)
+ | x >= 50 = "L" ++ toRomanNumeral (x - 50)
+ | x >= 40 = "XL" ++ toRomanNumeral (x - 40)
+ | x >= 10 = "X" ++ toRomanNumeral (x - 10)
+ | x == 9 = "IX"
+ | x >= 5 = "V" ++ toRomanNumeral (x - 5)
+ | x == 4 = "IV"
+ | x >= 1 = "I" ++ toRomanNumeral (x - 1)
+ | otherwise = ""
+
+-- | Escape whitespace and some punctuation characters in URI.
+escapeURI :: String -> String
+escapeURI = escapeURIString (not . needsEscaping)
+ where needsEscaping c = isSpace c || c `elem`
+ ['<','>','|','"','{','}','[',']','^', '`']
+
+
+-- | Convert tabs to spaces and filter out DOS line endings.
+-- Tabs will be preserved if tab stop is set to 0.
+tabFilter :: Int -- ^ Tab stop
+ -> String -- ^ Input
+ -> String
+tabFilter tabStop =
+ let go _ [] = ""
+ go _ ('\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':'\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':xs) = '\n' : go tabStop xs
+ go spsToNextStop ('\t':xs) =
+ if tabStop == 0
+ then '\t' : go tabStop xs
+ else replicate spsToNextStop ' ' ++ go tabStop xs
+ go 1 (x:xs) =
+ x : go tabStop xs
+ go spsToNextStop (x:xs) =
+ x : go (spsToNextStop - 1) xs
+ in go tabStop
+
+--
+-- Date/time
+--
+
+-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We
+-- limit years to the range 1601-9999 (ISO 8601 accepts greater than
+-- or equal to 1583, but MS Word only accepts dates starting 1601).
+normalizeDate :: String -> Maybe String
+normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
+ (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
+ where rejectBadYear day = case toGregorian day of
+ (y, _, _) | y >= 1601 && y <= 9999 -> Just day
+ _ -> Nothing
+ parsetimeWith =
+#if MIN_VERSION_time(1,5,0)
+ parseTimeM True defaultTimeLocale
+#else
+ parseTime defaultTimeLocale
+#endif
+ formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
+ "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
+ "%Y%m%d", "%Y%m", "%Y"]
+
+--
+-- Pandoc block and inline list processing
+--
+
+-- | Generate infinite lazy list of markers for an ordered list,
+-- depending on list attributes.
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers (start, numstyle, numdelim) =
+ let singleton c = [c]
+ nums = case numstyle of
+ DefaultStyle -> map show [start..]
+ Example -> map show [start..]
+ Decimal -> map show [start..]
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
+ UpperRoman -> map toRomanNumeral [start..]
+ LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ inDelim str = case numdelim of
+ DefaultDelim -> str ++ "."
+ Period -> str ++ "."
+ OneParen -> str ++ ")"
+ TwoParens -> "(" ++ str ++ ")"
+ in map inDelim nums
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, collapse double @Space@s into singles, and
+-- remove empty Str elements.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
+ where cleanup [] = []
+ cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
+ [] -> []
+ (x:xs) -> Space : x : cleanup xs
+ cleanup ((Str ""):rest) = cleanup rest
+ cleanup (x:rest) = x : cleanup rest
+
+isSpaceOrEmpty :: Inline -> Bool
+isSpaceOrEmpty Space = True
+isSpaceOrEmpty (Str "") = True
+isSpaceOrEmpty _ = False
+
+-- | Extract the leading and trailing spaces from inside an inline element
+-- and place them outside the element. SoftBreaks count as Spaces for
+-- these purposes.
+extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
+extractSpaces f is =
+ let contents = B.unMany is
+ left = case viewl contents of
+ (Space :< _) -> B.space
+ (SoftBreak :< _) -> B.softbreak
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> B.space
+ (_ :> SoftBreak) -> B.softbreak
+ _ -> mempty in
+ (left <> f (B.trimInlines . B.Many $ contents) <> right)
+
+-- | Extract inlines, removing formatting.
+removeFormatting :: Walkable Inline a => a -> [Inline]
+removeFormatting = query go . walk deNote
+ where go :: Inline -> [Inline]
+ go (Str xs) = [Str xs]
+ go Space = [Space]
+ go SoftBreak = [SoftBreak]
+ go (Code _ x) = [Str x]
+ go (Math _ x) = [Str x]
+ go LineBreak = [Space]
+ go _ = []
+
+deNote :: Inline -> Inline
+deNote (Note _) = Str ""
+deNote x = x
+
+-- | Convert pandoc structure to a string with formatting removed.
+-- Footnotes are skipped (since we don't want their contents in link
+-- labels).
+stringify :: Walkable Inline a => a -> String
+stringify = query go . walk deNote
+ where go :: Inline -> [Char]
+ go Space = " "
+ go SoftBreak = " "
+ go (Str x) = x
+ go (Code _ x) = x
+ go (Math _ x) = x
+ go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
+ go LineBreak = " "
+ go _ = ""
+
+-- | Bring all regular text in a pandoc structure to uppercase.
+--
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+ where go :: Inline -> Inline
+ go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go x = x
+
+-- | Change final list item from @Para@ to @Plain@ if the list contains
+-- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather
+-- than @[Block]@.
+compactify :: [Blocks] -- ^ List of list items (each a list of blocks)
+ -> [Blocks]
+compactify [] = []
+compactify items =
+ let (others, final) = (init items, last items)
+ in case reverse (B.toList final) of
+ (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
+ -- if this is only Para, change to Plain
+ [_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
+ _ -> items
+ _ -> items
+
+-- | Like @compactify@, but acts on items of definition lists.
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
+ let defs = concatMap snd items
+ in case reverse (concatMap B.toList defs) of
+ (Para x:xs)
+ | not (any isPara xs) ->
+ let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ if null lastDef
+ then [B.fromList lastDef]
+ else [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ | otherwise -> items
+ _ -> items
+
+-- | Combine a list of lines by adding hard linebreaks.
+combineLines :: [[Inline]] -> [Inline]
+combineLines = intercalate [LineBreak]
+
+-- | Convert a list of lines into a paragraph with hard line breaks. This is
+-- useful e.g. for rudimentary support of LineBlock elements in writers.
+linesToPara :: [[Inline]] -> Block
+linesToPara = Para . combineLines
+
+isPara :: Block -> Bool
+isPara (Para _) = True
+isPara _ = False
+
+-- | Data structure for defining hierarchical Pandoc documents
+data Element = Blk Block
+ | Sec Int [Int] Attr [Inline] [Element]
+ -- lvl num attributes label contents
+ deriving (Eq, Read, Show, Typeable, Data)
+
+instance Walkable Inline Element where
+ walk f (Blk x) = Blk (walk f x)
+ walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+ walkM f (Blk x) = Blk `fmap` walkM f x
+ walkM f (Sec lev nums attr ils elts) = do
+ ils' <- walkM f ils
+ elts' <- walkM f elts
+ return $ Sec lev nums attr ils' elts'
+ query f (Blk x) = query f x
+ query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+instance Walkable Block Element where
+ walk f (Blk x) = Blk (walk f x)
+ walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
+ walkM f (Blk x) = Blk `fmap` walkM f x
+ walkM f (Sec lev nums attr ils elts) = do
+ ils' <- walkM f ils
+ elts' <- walkM f elts
+ return $ Sec lev nums attr ils' elts'
+ query f (Blk x) = query f x
+ query f (Sec _ _ _ ils elts) = query f ils <> query f elts
+
+
+-- | Convert Pandoc inline list to plain text identifier. HTML
+-- identifiers must start with a letter, and may contain only
+-- letters, digits, and the characters _-.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier =
+ dropWhile (not . isAlpha) . intercalate "-" . words .
+ map (nbspToSp . toLower) .
+ filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
+ stringify
+ where nbspToSp '\160' = ' '
+ nbspToSp x = x
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
+hierarchicalize :: [Block] -> [Element]
+hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
+
+hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
+hierarchicalizeWithIds [] = return []
+hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
+ lastnum <- S.get
+ let lastnum' = take level lastnum
+ let newnum = case length lastnum' of
+ x | "unnumbered" `elem` classes -> []
+ | x >= level -> init lastnum' ++ [last lastnum' + 1]
+ | otherwise -> lastnum ++
+ replicate (level - length lastnum - 1) 0 ++ [1]
+ unless (null newnum) $ S.put newnum
+ let (sectionContents, rest) = break (headerLtEq level) xs
+ sectionContents' <- hierarchicalizeWithIds sectionContents
+ rest' <- hierarchicalizeWithIds rest
+ return $ Sec level newnum attr title' sectionContents' : rest'
+hierarchicalizeWithIds ((Div ("",["references"],[])
+ (Header level (ident,classes,kvs) title' : xs)):ys) =
+ hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
+ title') : (xs ++ ys))
+hierarchicalizeWithIds (x:rest) = do
+ rest' <- hierarchicalizeWithIds rest
+ return $ (Blk x) : rest'
+
+headerLtEq :: Int -> Block -> Bool
+headerLtEq level (Header l _ _) = l <= level
+headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
+headerLtEq _ _ = False
+
+-- | Generate a unique identifier from a list of inlines.
+-- Second argument is a list of already used identifiers.
+uniqueIdent :: [Inline] -> Set.Set String -> String
+uniqueIdent title' usedIdents
+ = let baseIdent = case inlineListToIdentifier title' of
+ "" -> "section"
+ x -> x
+ numIdent n = baseIdent ++ "-" ++ show n
+ in if baseIdent `Set.member` usedIdents
+ then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
+ Just x -> numIdent x
+ Nothing -> baseIdent -- if we have more than 60,000, allow repeats
+ else baseIdent
+
+-- | True if block is a Header block.
+isHeaderBlock :: Block -> Bool
+isHeaderBlock (Header _ _ _) = True
+isHeaderBlock _ = False
+
+-- | Shift header levels up or down.
+headerShift :: Int -> Pandoc -> Pandoc
+headerShift n = walk shift
+ where shift :: Block -> Block
+ shift (Header level attr inner) = Header (level + n) attr inner
+ shift x = x
+
+-- | Detect if a list is tight.
+isTightList :: [[Block]] -> Bool
+isTightList = all firstIsPlain
+ where firstIsPlain (Plain _ : _) = True
+ firstIsPlain _ = False
+
+-- | Set a field of a 'Meta' object. If the field already has a value,
+-- convert it into a list with the new value appended to the old value(s).
+addMetaField :: ToMetaValue a
+ => String
+ -> a
+ -> Meta
+ -> Meta
+addMetaField key val (Meta meta) =
+ Meta $ M.insertWith combine key (toMetaValue val) meta
+ where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
+ combine newval x = MetaList [x, newval]
+ tolist (MetaList ys) = ys
+ tolist y = [y]
+
+-- | Create 'Meta' from old-style title, authors, date. This is
+-- provided to ease the transition from the old API.
+makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
+makeMeta title authors date =
+ addMetaField "title" (B.fromList title)
+ $ addMetaField "author" (map B.fromList authors)
+ $ addMetaField "date" (B.fromList date)
+ $ nullMeta
+
+--
+-- TagSoup HTML handling
+--
+
+-- | Render HTML tags.
+renderTags' :: [Tag String] -> String
+renderTags' = renderTagsOptions
+ renderOptions{ optMinimize = matchTags ["hr", "br", "img",
+ "meta", "link"]
+ , optRawTag = matchTags ["script", "style"] }
+ where matchTags = \tags -> flip elem tags . map toLower
+
+--
+-- File handling
+--
+
+-- | Perform an IO action in a directory, returning to starting directory.
+inDirectory :: FilePath -> IO a -> IO a
+inDirectory path action = E.bracket
+ getCurrentDirectory
+ setCurrentDirectory
+ (const $ setCurrentDirectory path >> action)
+
+getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
+getDefaultReferenceDocx datadir = do
+ let paths = ["[Content_Types].xml",
+ "_rels/.rels",
+ "docProps/app.xml",
+ "docProps/core.xml",
+ "word/document.xml",
+ "word/fontTable.xml",
+ "word/footnotes.xml",
+ "word/numbering.xml",
+ "word/settings.xml",
+ "word/webSettings.xml",
+ "word/styles.xml",
+ "word/_rels/document.xml.rels",
+ "word/_rels/footnotes.xml.rels",
+ "word/theme/theme1.xml"]
+ let toLazy = fromChunks . (:[])
+ let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
+ getCurrentTime
+ contents <- toLazy <$> readDataFile datadir
+ ("docx/" ++ path)
+ return $ toEntry path epochtime contents
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- doesFileExist (d </> "reference.docx")
+ if exists
+ then return (Just (d </> "reference.docx"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> BL.readFile arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+getDefaultReferenceODT :: Maybe FilePath -> IO Archive
+getDefaultReferenceODT datadir = do
+ let paths = ["mimetype",
+ "manifest.rdf",
+ "styles.xml",
+ "content.xml",
+ "meta.xml",
+ "settings.xml",
+ "Configurations2/accelerator/current.xml",
+ "Thumbnails/thumbnail.png",
+ "META-INF/manifest.xml"]
+ let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+ contents <- (fromChunks . (:[])) `fmap`
+ readDataFile datadir ("odt/" ++ path)
+ return $ toEntry path epochtime contents
+ mbArchive <- case datadir of
+ Nothing -> return Nothing
+ Just d -> do
+ exists <- doesFileExist (d </> "reference.odt")
+ if exists
+ then return (Just (d </> "reference.odt"))
+ else return Nothing
+ case mbArchive of
+ Just arch -> toArchive <$> BL.readFile arch
+ Nothing -> foldr addEntryToArchive emptyArchive <$>
+ mapM pathToEntry paths
+
+
+readDefaultDataFile :: FilePath -> IO BS.ByteString
+readDefaultDataFile "reference.docx" =
+ (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
+readDefaultDataFile "reference.odt" =
+ (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+ case lookup (makeCanonical fname) dataFiles of
+ Nothing -> err 97 $ "Could not find data file " ++ fname
+ Just contents -> return contents
+ where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+ transformPathParts = reverse . foldl go []
+ go as "." = as
+ go (_:as) ".." = as
+ go as x = x : as
+#else
+ getDataFileName fname' >>= checkExistence >>= BS.readFile
+ where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+checkExistence :: FilePath -> IO FilePath
+checkExistence fn = do
+ exists <- doesFileExist fn
+ if exists
+ then return fn
+ else err 97 ("Could not find data file " ++ fn)
+#endif
+
+-- | Read file from specified user data directory or, if not found there, from
+-- Cabal data directory.
+readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
+readDataFile Nothing fname = readDefaultDataFile fname
+readDataFile (Just userDir) fname = do
+ exists <- doesFileExist (userDir </> fname)
+ if exists
+ then BS.readFile (userDir </> fname)
+ else readDefaultDataFile fname
+
+-- | Same as 'readDataFile' but returns a String instead of a ByteString.
+readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
+readDataFileUTF8 userDir fname =
+ UTF8.toString `fmap` readDataFile userDir fname
+
+-- | Read from a URL and return raw data and maybe mime type.
+openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
+openURL u
+ | Just u'' <- stripPrefix "data:" u =
+ let mime = takeWhile (/=',') u''
+ contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
+ in return $ Right (decodeLenient contents, Just mime)
+#ifdef HTTP_CLIENT
+ | otherwise = withSocketsDo $ E.try $ do
+ let parseReq = parseRequest
+ (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
+ (useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT"
+ req <- parseReq u
+ req' <- case proxy of
+ Left _ -> return req
+ Right pr -> (parseReq pr >>= \r ->
+ return $ addProxy (host r) (port r) req)
+ `mplus` return req
+ req'' <- case useragent of
+ Left _ -> return req'
+ Right ua -> do
+ let headers = requestHeaders req'
+ let useragentheader = (hUserAgent, B8.pack ua)
+ let headers' = useragentheader:headers
+ return $ req' {requestHeaders = headers'}
+ resp <- newManager tlsManagerSettings >>= httpLbs req''
+ return (BS.concat $ toChunks $ responseBody resp,
+ UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
+#else
+ | otherwise = E.try $ getBodyAndMimeType `fmap` browse
+ (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
+ setOutHandler $ const (return ())
+ setAllowRedirects True
+ request (getRequest' u'))
+ where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
+ getRequest' uriString = case parseURI uriString of
+ Nothing -> error ("Not a valid URL: " ++
+ uriString)
+ Just v -> mkRequest GET v
+ u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI
+#endif
+
+--
+-- Error reporting
+--
+
+err :: MonadIO m => Int -> String -> m a
+err exitCode msg = liftIO $ do
+ UTF8.hPutStrLn stderr msg
+ exitWith $ ExitFailure exitCode
+ return undefined
+
+warn :: MonadIO m => String -> m ()
+warn msg = liftIO $ do
+ UTF8.hPutStrLn stderr $ "[warning] " ++ msg
+
+mapLeft :: (a -> b) -> Either a c -> Either b c
+mapLeft f (Left x) = Left (f x)
+mapLeft _ (Right x) = Right x
+
+-- | Remove intermediate "." and ".." directories from a path.
+--
+-- > collapseFilePath "./foo" == "foo"
+-- > collapseFilePath "/bar/../baz" == "/baz"
+-- > collapseFilePath "/../baz" == "/../baz"
+-- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
+-- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
+-- > collapseFilePath "parent/foo/.." == "parent"
+-- > collapseFilePath "/parent/foo/../../bar" == "/bar"
+collapseFilePath :: FilePath -> FilePath
+collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
+ where
+ go rs "." = rs
+ go r@(p:rs) ".." = case p of
+ ".." -> ("..":r)
+ (checkPathSeperator -> Just True) -> ("..":r)
+ _ -> rs
+ go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
+ go rs x = x:rs
+ isSingleton [] = Nothing
+ isSingleton [x] = Just x
+ isSingleton _ = Nothing
+ checkPathSeperator = fmap isPathSeparator . isSingleton
+
+--
+-- File selection from the archive
+--
+filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)]
+filteredFilesFromArchive zf f =
+ mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf))
+ where
+ fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
+ fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
+
+---
+--- Squash blocks into inlines
+---
+
+blockToInlines :: Block -> [Inline]
+blockToInlines (Plain ils) = ils
+blockToInlines (Para ils) = ils
+blockToInlines (LineBlock lns) = combineLines lns
+blockToInlines (CodeBlock attr str) = [Code attr str]
+blockToInlines (RawBlock fmt str) = [RawInline fmt str]
+blockToInlines (BlockQuote blks) = blocksToInlines blks
+blockToInlines (OrderedList _ blkslst) =
+ concatMap blocksToInlines blkslst
+blockToInlines (BulletList blkslst) =
+ concatMap blocksToInlines blkslst
+blockToInlines (DefinitionList pairslst) =
+ concatMap f pairslst
+ where
+ f (ils, blkslst) = ils ++
+ [Str ":", Space] ++
+ (concatMap blocksToInlines blkslst)
+blockToInlines (Header _ _ ils) = ils
+blockToInlines (HorizontalRule) = []
+blockToInlines (Table _ _ _ headers rows) =
+ intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
+ where
+ tbl = headers : rows
+blockToInlines (Div _ blks) = blocksToInlines blks
+blockToInlines Null = []
+
+blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
+blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
+
+blocksToInlines :: [Block] -> [Inline]
+blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
+
+
+--
+-- Safe read
+--
+
+safeRead :: (MonadPlus m, Read a) => String -> m a
+safeRead s = case reads s of
+ (d,x):_
+ | all isSpace x -> return d
+ _ -> mzero
+
+--
+-- Temp directory
+--
+
+withTempDir :: String -> (FilePath -> IO a) -> IO a
+withTempDir =
+#ifdef _WINDOWS
+ withTempDirectory "."
+#else
+ withSystemTempDirectory
+#endif
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
new file mode 100644
index 000000000..e19dba3e2
--- /dev/null
+++ b/src/Text/Pandoc/Slides.hs
@@ -0,0 +1,63 @@
+{-
+Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Slides
+ Copyright : Copyright (C) 2012-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions for splitting documents into slides for slide
+show formats (dzslides, revealjs, s5, slidy, slideous, beamer).
+-}
+module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where
+import Text.Pandoc.Definition
+
+-- | Find level of header that starts slides (defined as the least header
+-- level that occurs before a non-header/non-hrule in the blocks).
+getSlideLevel :: [Block] -> Int
+getSlideLevel = go 6
+ where go least (Header n _ _ : x : xs)
+ | n < least && nonHOrHR x = go n xs
+ | otherwise = go least (x:xs)
+ go least (_ : xs) = go least xs
+ go least [] = least
+ nonHOrHR (Header{}) = False
+ nonHOrHR (HorizontalRule) = False
+ nonHOrHR _ = True
+
+-- | Prepare a block list to be passed to hierarchicalize.
+prepSlides :: Int -> [Block] -> [Block]
+prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader
+ where splitHrule (HorizontalRule : Header n attr xs : ys)
+ | n == slideLevel = Header slideLevel attr xs : splitHrule ys
+ splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] :
+ splitHrule xs
+ splitHrule (x : xs) = x : splitHrule xs
+ splitHrule [] = []
+ extractRefsHeader bs =
+ case reverse bs of
+ (Div ("",["references"],[]) (Header n attrs xs : ys) : zs)
+ -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys])
+ _ -> bs
+ ensureStartWithH bs@(Header n _ _:_)
+ | n <= slideLevel = bs
+ ensureStartWithH bs = Header slideLevel nullAttr [Str "\0"] : bs
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
new file mode 100644
index 000000000..705ac54c9
--- /dev/null
+++ b/src/Text/Pandoc/Templates.hs
@@ -0,0 +1,77 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances,
+ OverloadedStrings, GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Templates
+ Copyright : Copyright (C) 2009-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A simple templating system with variable substitution and conditionals.
+
+-}
+
+module Text.Pandoc.Templates ( renderTemplate
+ , renderTemplate'
+ , TemplateTarget
+ , varListToJSON
+ , compileTemplate
+ , Template
+ , getDefaultTemplate ) where
+
+import Text.DocTemplates (Template, TemplateTarget, compileTemplate,
+ renderTemplate, applyTemplate,
+ varListToJSON)
+import Data.Aeson (ToJSON(..))
+import qualified Data.Text as T
+import System.FilePath ((</>), (<.>))
+import qualified Control.Exception.Extensible as E (try, IOException)
+import Text.Pandoc.Shared (readDataFileUTF8)
+
+-- | Get default template for the specified writer.
+getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
+ -> String -- ^ Name of writer
+ -> IO (Either E.IOException String)
+getDefaultTemplate user writer = do
+ let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
+ case format of
+ "native" -> return $ Right ""
+ "json" -> return $ Right ""
+ "docx" -> return $ Right ""
+ "fb2" -> return $ Right ""
+ "odt" -> getDefaultTemplate user "opendocument"
+ "html" -> getDefaultTemplate user "html5"
+ "docbook" -> getDefaultTemplate user "docbook5"
+ "epub" -> getDefaultTemplate user "epub3"
+ "markdown_strict" -> getDefaultTemplate user "markdown"
+ "multimarkdown" -> getDefaultTemplate user "markdown"
+ "markdown_github" -> getDefaultTemplate user "markdown"
+ "markdown_mmd" -> getDefaultTemplate user "markdown"
+ "markdown_phpextra" -> getDefaultTemplate user "markdown"
+ _ -> let fname = "templates" </> "default" <.> format
+ in E.try $ readDataFileUTF8 user fname
+
+-- | Like 'applyTemplate', but raising an error if compilation fails.
+renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b
+renderTemplate' template = either error id . applyTemplate (T.pack template)
+
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..62a662029
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,121 @@
+{-
+Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UTF8
+ Copyright : Copyright (C) 2010-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
+-}
+module Text.Pandoc.UTF8 ( readFile
+ , writeFile
+ , getContents
+ , putStr
+ , putStrLn
+ , hPutStr
+ , hPutStrLn
+ , hGetContents
+ , toString
+ , fromString
+ , toStringLazy
+ , fromStringLazy
+ , encodePath
+ , decodeArg
+ )
+
+where
+
+import System.IO hiding (readFile, writeFile, getContents,
+ putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
+import qualified System.IO as IO
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text.Encoding as T
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Encoding as TL
+
+readFile :: FilePath -> IO String
+readFile f = do
+ h <- openFile (encodePath f) ReadMode
+ hGetContents h
+
+writeFile :: FilePath -> String -> IO ()
+writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s
+
+getContents :: IO String
+getContents = hGetContents stdin
+
+putStr :: String -> IO ()
+putStr s = hPutStr stdout s
+
+putStrLn :: String -> IO ()
+putStrLn s = hPutStrLn stdout s
+
+hPutStr :: Handle -> String -> IO ()
+hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s
+
+hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
+
+hGetContents :: Handle -> IO String
+hGetContents = fmap toString . B.hGetContents
+-- hGetContents h = hSetEncoding h utf8_bom
+-- >> hSetNewlineMode h universalNewlineMode
+-- >> IO.hGetContents h
+
+-- | Drop BOM (byte order marker) if present at beginning of string.
+-- Note that Data.Text converts the BOM to code point FEFF, zero-width
+-- no-break space, so if the string begins with this we strip it off.
+dropBOM :: String -> String
+dropBOM ('\xFEFF':xs) = xs
+dropBOM xs = xs
+
+filterCRs :: String -> String
+filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
+filterCRs ('\r':xs) = '\n' : filterCRs xs
+filterCRs (x:xs) = x : filterCRs xs
+filterCRs [] = []
+
+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
+toString :: B.ByteString -> String
+toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
+
+fromString :: String -> B.ByteString
+fromString = T.encodeUtf8 . T.pack
+
+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
+toStringLazy :: BL.ByteString -> String
+toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
+
+fromStringLazy :: String -> BL.ByteString
+fromStringLazy = TL.encodeUtf8 . TL.pack
+
+encodePath :: FilePath -> FilePath
+encodePath = id
+
+decodeArg :: String -> String
+decodeArg = id
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
new file mode 100644
index 000000000..8de102742
--- /dev/null
+++ b/src/Text/Pandoc/UUID.hs
@@ -0,0 +1,78 @@
+{-
+Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UUID
+ Copyright : Copyright (C) 2010-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UUID generation using Version 4 (random method) described
+in RFC4122. See http://tools.ietf.org/html/rfc4122
+-}
+
+module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where
+
+import Text.Printf ( printf )
+import System.Random ( RandomGen, randoms, getStdGen )
+import Data.Word
+import Data.Bits ( setBit, clearBit )
+
+data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+ Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+
+instance Show UUID where
+ show (UUID a b c d e f g h i j k l m n o p) =
+ "urn:uuid:" ++
+ printf "%02x" a ++
+ printf "%02x" b ++
+ printf "%02x" c ++
+ printf "%02x" d ++
+ "-" ++
+ printf "%02x" e ++
+ printf "%02x" f ++
+ "-" ++
+ printf "%02x" g ++
+ printf "%02x" h ++
+ "-" ++
+ printf "%02x" i ++
+ printf "%02x" j ++
+ "-" ++
+ printf "%02x" k ++
+ printf "%02x" l ++
+ printf "%02x" m ++
+ printf "%02x" n ++
+ printf "%02x" o ++
+ printf "%02x" p
+
+getUUID :: RandomGen g => g -> UUID
+getUUID gen =
+ let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8]
+ -- set variant
+ i' = i `setBit` 7 `clearBit` 6
+ -- set version (0100 for random)
+ g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
+ in
+ UUID a b c d e f g' h i' j k l m n o p
+
+getRandomUUID :: IO UUID
+getRandomUUID = getUUID <$> getStdGen
+
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
new file mode 100644
index 000000000..356b29504
--- /dev/null
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -0,0 +1,470 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.AsciiDoc
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to asciidoc.
+
+Note that some information may be lost in conversion, due to
+expressive limitations of asciidoc. Footnotes and table cells with
+paragraphs (or other block items) are not possible in asciidoc.
+If pandoc encounters one of these, it will insert a message indicating
+that it has omitted the construct.
+
+AsciiDoc: <http://www.methods.co.nz/asciidoc/>
+-}
+module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, space)
+import Data.Maybe (fromMaybe)
+import Data.List ( stripPrefix, intersperse, intercalate )
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import Control.Monad.State
+import qualified Data.Map as M
+import Data.Aeson (Value(String), fromJSON, toJSON, Result(..))
+import qualified Data.Text as T
+import Data.Char (isSpace, isPunctuation)
+import Text.Pandoc.Class (PandocMonad)
+
+data WriterState = WriterState { defListMarker :: String
+ , orderedListLevel :: Int
+ , bulletListLevel :: Int
+ , intraword :: Bool
+ }
+
+-- | Convert Pandoc to AsciiDoc.
+writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeAsciiDoc opts document = return $
+ evalState (pandocToAsciiDoc opts document) WriterState{
+ defListMarker = "::"
+ , orderedListLevel = 1
+ , bulletListLevel = 1
+ , intraword = False
+ }
+
+-- | Return asciidoc representation of document.
+pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String
+pandocToAsciiDoc opts (Pandoc meta blocks) = do
+ let titleblock = not $ null (docTitle meta) && null (docAuthors meta) &&
+ null (docDate meta)
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToAsciiDoc opts)
+ (fmap (render colwidth) . inlineListToAsciiDoc opts)
+ meta
+ let addTitleLine (String t) = String $
+ t <> "\n" <> T.replicate (T.length t) "="
+ addTitleLine x = x
+ let metadata' = case fromJSON metadata of
+ Success m -> toJSON $ M.adjust addTitleLine
+ ("title" :: T.Text) m
+ _ -> metadata
+ body <- blockListToAsciiDoc opts blocks
+ let main = render colwidth body
+ let context = defField "body" main
+ $ defField "toc"
+ (writerTableOfContents opts &&
+ writerTemplate opts /= Nothing)
+ $ defField "titleblock" titleblock
+ $ metadata'
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Escape special characters for AsciiDoc.
+escapeString :: String -> String
+escapeString = escapeStringUsing escs
+ where escs = backslashEscapes "{"
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: Parser [Char] ParserState Char
+olMarker = do (start, style', delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style' == UpperAlpha || (style' == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case runParser olMarker defaultParserState "para start" (take 10 str) of
+ Left _ -> False
+ Right _ -> True
+
+-- | Convert Pandoc block element to asciidoc.
+blockToAsciiDoc :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToAsciiDoc _ Null = return empty
+blockToAsciiDoc opts (Plain inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ return $ contents <> blankline
+blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
+ blockToAsciiDoc opts (Para [Image attr alt (src,tit)])
+blockToAsciiDoc opts (Para inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ -- escape if para starts with ordered list marker
+ let esc = if beginsWithOrderedListMarker (render Nothing contents)
+ then text "\\"
+ else empty
+ return $ esc <> contents <> blankline
+blockToAsciiDoc opts (LineBlock lns) = do
+ let docify line = if null line
+ then return blankline
+ else inlineListToAsciiDoc opts line
+ let joinWithLinefeeds = nowrap . mconcat . intersperse cr
+ contents <- joinWithLinefeeds <$> mapM docify lns
+ return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline
+blockToAsciiDoc _ (RawBlock f s)
+ | f == "asciidoc" = return $ text s
+ | otherwise = return empty
+blockToAsciiDoc _ HorizontalRule =
+ return $ blankline <> text "'''''" <> blankline
+blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ let len = offset contents
+ -- ident seem to be empty most of the time and asciidoc will generate them automatically
+ -- so lets make them not show up when null
+ let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ let setext = writerSetextHeaders opts
+ return $
+ (if setext
+ then
+ identifier $$ contents $$
+ (case level of
+ 1 -> text $ replicate len '-'
+ 2 -> text $ replicate len '~'
+ 3 -> text $ replicate len '^'
+ 4 -> text $ replicate len '+'
+ _ -> empty) <> blankline
+ else
+ identifier $$ text (replicate level '=') <> space <> contents <> blankline)
+blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $
+ if null classes
+ then "...." $$ text str $$ "...."
+ else attrs $$ "----" $$ text str $$ "----")
+ <> blankline
+ where attrs = "[" <> text (intercalate "," ("source" : classes)) <> "]"
+blockToAsciiDoc opts (BlockQuote blocks) = do
+ contents <- blockListToAsciiDoc opts blocks
+ let isBlock (BlockQuote _) = True
+ isBlock _ = False
+ -- if there are nested block quotes, put in an open block
+ let contents' = if any isBlock blocks
+ then "--" $$ contents $$ "--"
+ else contents
+ let cols = offset contents'
+ let bar = text $ replicate cols '_'
+ return $ bar $$ chomp contents' $$ bar <> blankline
+blockToAsciiDoc opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToAsciiDoc opts caption
+ let caption'' = if null caption
+ then empty
+ else "." <> caption' <> cr
+ let isSimple = all (== 0) widths
+ let relativePercentWidths = if isSimple
+ then widths
+ else map (/ (sum widths)) widths
+ let widths'' :: [Integer]
+ widths'' = map (floor . (* 100)) relativePercentWidths
+ -- ensure that the widths sum to 100
+ let widths' = case widths'' of
+ _ | isSimple -> widths''
+ (w:ws) | sum (w:ws) < 100
+ -> (100 - sum ws) : ws
+ ws -> ws
+ let totalwidth :: Integer
+ totalwidth = floor $ sum widths * 100
+ let colspec al wi = (case al of
+ AlignLeft -> "<"
+ AlignCenter -> "^"
+ AlignRight -> ">"
+ AlignDefault -> "") ++
+ if wi == 0 then "" else (show wi ++ "%")
+ let headerspec = if all null headers
+ then empty
+ else text "options=\"header\","
+ let widthspec = if totalwidth == 0
+ then empty
+ else text "width="
+ <> doubleQuotes (text $ show totalwidth ++ "%")
+ <> text ","
+ let tablespec = text "["
+ <> widthspec
+ <> text "cols="
+ <> doubleQuotes (text $ intercalate ","
+ $ zipWith colspec aligns widths')
+ <> text ","
+ <> headerspec <> text "]"
+ let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
+ return $ text "|" <> chomp d
+ makeCell [Para x] = makeCell [Plain x]
+ makeCell [] = return $ text "|"
+ makeCell bs = do d <- blockListToAsciiDoc opts bs
+ return $ text "a|" $$ d
+ let makeRow cells = hsep `fmap` mapM makeCell cells
+ rows' <- mapM makeRow rows
+ head' <- makeRow headers
+ let head'' = if all null headers then empty else head'
+ let colwidth = if writerWrapText opts == WrapAuto
+ then writerColumns opts
+ else 100000
+ let maxwidth = maximum $ map offset (head':rows')
+ let body = if maxwidth > colwidth then vsep rows' else vcat rows'
+ let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '='
+ return $
+ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
+blockToAsciiDoc opts (BulletList items) = do
+ contents <- mapM (bulletListItemToAsciiDoc opts) items
+ return $ cat contents <> blankline
+blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
+ let sty' = case sty of
+ UpperRoman -> UpperAlpha
+ LowerRoman -> LowerAlpha
+ x -> x
+ let markers = orderedListMarkers (1, sty', Period) -- start num not used
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $
+ zip markers' items
+ return $ cat contents <> blankline
+blockToAsciiDoc opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToAsciiDoc opts) items
+ return $ cat contents <> blankline
+blockToAsciiDoc opts (Div (ident,_,_) bs) = do
+ let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ contents <- blockListToAsciiDoc opts bs
+ return $ identifier $$ contents
+
+-- | Convert bullet list item (list of blocks) to asciidoc.
+bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToAsciiDoc opts blocks = do
+ let addBlock :: Doc -> Block -> State WriterState Doc
+ addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
+ addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> text "+" <> cr <> chomp x
+ lev <- bulletListLevel `fmap` get
+ modify $ \s -> s{ bulletListLevel = lev + 1 }
+ contents <- foldM addBlock empty blocks
+ modify $ \s -> s{ bulletListLevel = lev }
+ let marker = text (replicate lev '*')
+ return $ marker <> text " " <> contents <> cr
+
+-- | Convert ordered list item (a list of blocks) to asciidoc.
+orderedListItemToAsciiDoc :: WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToAsciiDoc opts marker blocks = do
+ let addBlock :: Doc -> Block -> State WriterState Doc
+ addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
+ addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> text "+" <> cr <> chomp x
+ lev <- orderedListLevel `fmap` get
+ modify $ \s -> s{ orderedListLevel = lev + 1 }
+ contents <- foldM addBlock empty blocks
+ modify $ \s -> s{ orderedListLevel = lev }
+ return $ text marker <> text " " <> contents <> cr
+
+-- | Convert definition list item (label, list of blocks) to asciidoc.
+definitionListItemToAsciiDoc :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> State WriterState Doc
+definitionListItemToAsciiDoc opts (label, defs) = do
+ labelText <- inlineListToAsciiDoc opts label
+ marker <- defListMarker `fmap` get
+ if marker == "::"
+ then modify (\st -> st{ defListMarker = ";;"})
+ else modify (\st -> st{ defListMarker = "::"})
+ let divider = cr <> text "+" <> cr
+ let defsToAsciiDoc :: [Block] -> State WriterState Doc
+ defsToAsciiDoc ds = (vcat . intersperse divider . map chomp)
+ `fmap` mapM (blockToAsciiDoc opts) ds
+ defs' <- mapM defsToAsciiDoc defs
+ modify (\st -> st{ defListMarker = marker })
+ let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs'
+ return $ labelText <> text marker <> cr <> contents <> cr
+
+-- | Convert list of Pandoc block elements to asciidoc.
+blockListToAsciiDoc :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
+
+data SpacyLocation = End | Start
+
+-- | Convert list of Pandoc inline elements to asciidoc.
+inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToAsciiDoc opts lst = do
+ oldIntraword <- gets intraword
+ setIntraword False
+ result <- go lst
+ setIntraword oldIntraword
+ return result
+ where go [] = return empty
+ go (y:x:xs)
+ | not (isSpacy End y) = do
+ y' <- if isSpacy Start x
+ then inlineToAsciiDoc opts y
+ else withIntraword $ inlineToAsciiDoc opts y
+ x' <- withIntraword $ inlineToAsciiDoc opts x
+ xs' <- go xs
+ return (y' <> x' <> xs')
+ | not (isSpacy Start x) = do
+ y' <- withIntraword $ inlineToAsciiDoc opts y
+ xs' <- go (x:xs)
+ return (y' <> xs')
+ go (x:xs) = do
+ x' <- inlineToAsciiDoc opts x
+ xs' <- go xs
+ return (x' <> xs')
+ isSpacy :: SpacyLocation -> Inline -> Bool
+ isSpacy _ Space = True
+ isSpacy _ LineBreak = True
+ isSpacy _ SoftBreak = True
+ -- Note that \W characters count as spacy in AsciiDoc
+ -- for purposes of determining interword:
+ isSpacy End (Str xs) = case reverse xs of
+ c:_ -> isPunctuation c || isSpace c
+ _ -> False
+ isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c
+ isSpacy _ _ = False
+
+setIntraword :: Bool -> State WriterState ()
+setIntraword b = modify $ \st -> st{ intraword = b }
+
+withIntraword :: State WriterState a -> State WriterState a
+withIntraword p = setIntraword True *> p <* setIntraword False
+
+-- | Convert Pandoc inline element to asciidoc.
+inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
+inlineToAsciiDoc opts (Emph lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ isIntraword <- gets intraword
+ let marker = if isIntraword then "__" else "_"
+ return $ marker <> contents <> marker
+inlineToAsciiDoc opts (Strong lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ isIntraword <- gets intraword
+ let marker = if isIntraword then "**" else "*"
+ return $ marker <> contents <> marker
+inlineToAsciiDoc opts (Strikeout lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "[line-through]*" <> contents <> "*"
+inlineToAsciiDoc opts (Superscript lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "^" <> contents <> "^"
+inlineToAsciiDoc opts (Subscript lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "~" <> contents <> "~"
+inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Quoted SingleQuote lst) =
+ inlineListToAsciiDoc opts (Str "`" : lst ++ [Str "'"])
+inlineToAsciiDoc opts (Quoted DoubleQuote lst) =
+ inlineListToAsciiDoc opts (Str "``" : lst ++ [Str "''"])
+inlineToAsciiDoc _ (Code _ str) = return $
+ text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`"
+inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str
+inlineToAsciiDoc _ (Math InlineMath str) =
+ return $ "latexmath:[$" <> text str <> "$]"
+inlineToAsciiDoc _ (Math DisplayMath str) =
+ return $ "latexmath:[\\[" <> text str <> "\\]]"
+inlineToAsciiDoc _ (RawInline f s)
+ | f == "asciidoc" = return $ text s
+ | otherwise = return empty
+inlineToAsciiDoc _ LineBreak = return $ " +" <> cr
+inlineToAsciiDoc _ Space = return space
+inlineToAsciiDoc opts SoftBreak =
+ case writerWrapText opts of
+ WrapAuto -> return space
+ WrapPreserve -> return cr
+ WrapNone -> return space
+inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do
+-- relative: link:downloads/foo.zip[download foo.zip]
+-- abs: http://google.cod[Google]
+-- or my@email.com[email john]
+ linktext <- inlineListToAsciiDoc opts txt
+ let isRelative = ':' `notElem` src
+ let prefix = if isRelative
+ then text "link:"
+ else empty
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
+ let useAuto = case txt of
+ [Str s] | escapeURI s == srcSuffix -> True
+ _ -> False
+ return $ if useAuto
+ then text srcSuffix
+ else prefix <> text src <> "[" <> linktext <> "]"
+inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do
+-- image:images/logo.png[Company logo, title="blah"]
+ let txt = if (null alternate) || (alternate == [Str ""])
+ then [Str "image"]
+ else alternate
+ linktext <- inlineListToAsciiDoc opts txt
+ let linktitle = if null tit
+ then empty
+ else ",title=\"" <> text tit <> "\""
+ showDim dir = case (dimension dir attr) of
+ Just (Percent a) ->
+ ["scaledwidth=" <> text (show (Percent a))]
+ Just dim ->
+ [text (show dir) <> "=" <> text (showInPixel opts dim)]
+ Nothing ->
+ []
+ dimList = showDim Width ++ showDim Height
+ dims = if null dimList
+ then empty
+ else "," <> cat (intersperse "," dimList)
+ return $ "image:" <> text src <> "[" <> linktext <> linktitle <> dims <> "]"
+inlineToAsciiDoc opts (Note [Para inlines]) =
+ inlineToAsciiDoc opts (Note [Plain inlines])
+inlineToAsciiDoc opts (Note [Plain inlines]) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ return $ text "footnote:[" <> contents <> "]"
+-- asciidoc can't handle blank lines in notes
+inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
+inlineToAsciiDoc opts (Span (ident,_,_) ils) = do
+ let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ contents <- inlineListToAsciiDoc opts ils
+ return $ identifier <> contents
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
new file mode 100644
index 000000000..b83f6785d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -0,0 +1,190 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to CommonMark.
+
+CommonMark: <http://commonmark.org>
+-}
+module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
+
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (isTightList, linesToPara)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import CMark
+import qualified Data.Text as T
+import Control.Monad.State (runState, State, modify, get)
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Class (PandocMonad)
+import Data.Foldable (foldrM)
+
+-- | Convert Pandoc to CommonMark.
+writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeCommonMark opts (Pandoc meta blocks) = do
+ let (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ main <- blocksToCommonMark opts (blocks' ++ notes')
+ metadata <- metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ let context = defField "body" main $ metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+processNotes :: Inline -> State [[Block]] Inline
+processNotes (Note bs) = do
+ modify (bs :)
+ notes <- get
+ return $ Str $ "[" ++ show (length notes) ++ "]"
+processNotes x = return x
+
+node :: NodeType -> [Node] -> Node
+node = Node Nothing
+
+blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m String
+blocksToCommonMark opts bs = do
+ let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ nodes <- blocksToNodes bs
+ return $
+ T.unpack $
+ nodeToCommonmark cmarkOpts colwidth $
+ node DOCUMENT nodes
+
+inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m String
+inlinesToCommonMark opts ils = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node PARAGRAPH (inlinesToNodes ils)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+
+blocksToNodes :: PandocMonad m => [Block] -> m [Node]
+blocksToNodes = foldrM blockToNodes []
+
+blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node]
+blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
+blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns)
+blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns
+blockToNodes (CodeBlock (_,classes,_) xs) ns = return $
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns)
+blockToNodes (RawBlock fmt xs) ns
+ | fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns)
+ | otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns)
+blockToNodes (BlockQuote bs) ns = do
+ nodes <- blocksToNodes bs
+ return (node BLOCK_QUOTE nodes : ns)
+blockToNodes (BulletList items) ns = do
+ nodes <- mapM blocksToNodes items
+ return (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM) nodes) : ns)
+blockToNodes (OrderedList (start, _sty, delim) items) ns = do
+ nodes <- mapM blocksToNodes items
+ return (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM) nodes) : ns)
+blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns)
+blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns)
+blockToNodes (Div _ bs) ns = do
+ nodes <- blocksToNodes bs
+ return (nodes ++ ns)
+blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns
+ where items' = map dlToBullet items
+ dlToBullet (term, ((Para xs : ys) : zs)) =
+ Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, ((Plain xs : ys) : zs)) =
+ Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, xs) =
+ Para term : concat xs
+blockToNodes t@(Table _ _ _ _ _) ns = do
+ s <- writeHtml5String def $! Pandoc nullMeta [t]
+ return (node (HTML_BLOCK (T.pack $! s)) [] : ns)
+blockToNodes Null ns = return ns
+
+inlinesToNodes :: [Inline] -> [Node]
+inlinesToNodes = foldr inlineToNodes []
+
+inlineToNodes :: Inline -> [Node] -> [Node]
+inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
+inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes LineBreak = (node LINEBREAK [] :)
+inlineToNodes SoftBreak = (node SOFTBREAK [] :)
+inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
+inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
+inlineToNodes (Strikeout xs) =
+ ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++
+ [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+inlineToNodes (Superscript xs) =
+ ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++
+ [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
+inlineToNodes (Subscript xs) =
+ ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++
+ [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
+inlineToNodes (SmallCaps xs) =
+ ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) []
+ : inlinesToNodes xs ++
+ [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
+inlineToNodes (Link _ ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (Image _ ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (RawInline fmt xs)
+ | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :)
+ | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :)
+inlineToNodes (Quoted qt ils) =
+ ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+ where (start, end) = case qt of
+ SingleQuote -> (T.pack "‘", T.pack "’")
+ DoubleQuote -> (T.pack "“", T.pack "”")
+inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes (Math mt str) =
+ case mt of
+ InlineMath ->
+ (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Note _) = id -- should not occur
+-- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
new file mode 100644
index 000000000..ea8b90db3
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -0,0 +1,481 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ConTeXt
+ Copyright : Copyright (C) 2007-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into ConTeXt.
+-}
+module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Walk (query)
+import Text.Printf ( printf )
+import Data.List ( intercalate, intersperse )
+import Data.Char ( ord )
+import Data.Maybe ( catMaybes )
+import Control.Monad.State
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates ( renderTemplate' )
+import Network.URI ( isURI, unEscapeString )
+import Text.Pandoc.Class (PandocMonad)
+
+data WriterState =
+ WriterState { stNextRef :: Int -- number of next URL reference
+ , stOrderedListLevel :: Int -- level of ordered list
+ , stOptions :: WriterOptions -- writer options
+ }
+
+orderedListStyles :: [Char]
+orderedListStyles = cycle "narg"
+
+-- | Convert Pandoc to ConTeXt.
+writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeConTeXt options document = return $
+ let defaultWriterState = WriterState { stNextRef = 1
+ , stOrderedListLevel = 0
+ , stOptions = options
+ }
+ in evalState (pandocToConTeXt options document) defaultWriterState
+
+pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
+pandocToConTeXt options (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText options == WrapAuto
+ then Just $ writerColumns options
+ else Nothing
+ metadata <- metaToJSON options
+ (fmap (render colwidth) . blockListToConTeXt)
+ (fmap (render colwidth) . inlineListToConTeXt)
+ meta
+ body <- mapM (elementToConTeXt options) $ hierarchicalize blocks
+ let main = (render colwidth . vcat) body
+ let layoutFromMargins = intercalate [','] $ catMaybes $
+ map (\(x,y) ->
+ ((x ++ "=") ++) <$> getField y metadata)
+ [("leftmargin","margin-left")
+ ,("rightmargin","margin-right")
+ ,("top","margin-top")
+ ,("bottom","margin-bottom")
+ ]
+ let context = defField "toc" (writerTableOfContents options)
+ $ defField "placelist" (intercalate ("," :: String) $
+ take (writerTOCDepth options +
+ case writerTopLevelDivision options of
+ TopLevelPart -> 0
+ TopLevelChapter -> 0
+ _ -> 1)
+ ["chapter","section","subsection","subsubsection",
+ "subsubsubsection","subsubsubsubsection"])
+ $ defField "body" main
+ $ defField "layout" layoutFromMargins
+ $ defField "number-sections" (writerNumberSections options)
+ $ metadata
+ let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
+ getField "lang" context)
+ $ defField "context-dir" (toContextDir $ getField "dir" context)
+ $ context
+ return $ case writerTemplate options of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context'
+
+toContextDir :: Maybe String -> String
+toContextDir (Just "rtl") = "r2l"
+toContextDir (Just "ltr") = "l2r"
+toContextDir _ = ""
+
+-- | escape things as needed for ConTeXt
+escapeCharForConTeXt :: WriterOptions -> Char -> String
+escapeCharForConTeXt opts ch =
+ let ligatures = isEnabled Ext_smart opts in
+ case ch of
+ '{' -> "\\{"
+ '}' -> "\\}"
+ '\\' -> "\\letterbackslash{}"
+ '$' -> "\\$"
+ '|' -> "\\letterbar{}"
+ '%' -> "\\letterpercent{}"
+ '~' -> "\\lettertilde{}"
+ '#' -> "\\#"
+ '[' -> "{[}"
+ ']' -> "{]}"
+ '\160' -> "~"
+ '\x2014' | ligatures -> "---"
+ '\x2013' | ligatures -> "--"
+ '\x2019' | ligatures -> "'"
+ '\x2026' -> "\\ldots{}"
+ x -> [x]
+
+-- | Escape string for ConTeXt
+stringToConTeXt :: WriterOptions -> String -> String
+stringToConTeXt opts = concatMap (escapeCharForConTeXt opts)
+
+-- | Sanitize labels
+toLabel :: String -> String
+toLabel z = concatMap go z
+ where go x
+ | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x)
+ | otherwise = [x]
+
+-- | Convert Elements to ConTeXt
+elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
+elementToConTeXt _ (Blk block) = blockToConTeXt block
+elementToConTeXt opts (Sec level _ attr title' elements) = do
+ header' <- sectionHeader attr level title'
+ innerContents <- mapM (elementToConTeXt opts) elements
+ return $ vcat (header' : innerContents)
+
+-- | Convert Pandoc block element to ConTeXt.
+blockToConTeXt :: Block
+ -> State WriterState Doc
+blockToConTeXt Null = return empty
+blockToConTeXt (Plain lst) = inlineListToConTeXt lst
+-- title beginning with fig: indicates that the image is a figure
+blockToConTeXt (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
+ capt <- inlineListToConTeXt txt
+ img <- inlineToConTeXt (Image attr txt (src, ""))
+ let (ident, _, _) = attr
+ label = if null ident
+ then empty
+ else "[]" <> brackets (text $ toLabel ident)
+ return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline
+blockToConTeXt (Para lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ contents <> blankline
+blockToConTeXt (LineBlock lns) = do
+ doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns
+ return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline
+blockToConTeXt (BlockQuote lst) = do
+ contents <- blockListToConTeXt lst
+ return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
+blockToConTeXt (CodeBlock _ str) =
+ return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline
+ -- blankline because \stoptyping can't have anything after it, inc. '}'
+blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
+blockToConTeXt (RawBlock _ _ ) = return empty
+blockToConTeXt (Div (ident,_,kvs) bs) = do
+ let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
+ let wrapRef txt = if null ident
+ then txt
+ else ("\\reference" <> brackets (text $ toLabel ident) <>
+ braces empty <> "%") $$ txt
+ wrapDir = case lookup "dir" kvs of
+ Just "rtl" -> align "righttoleft"
+ Just "ltr" -> align "lefttoright"
+ _ -> id
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> "\\start\\language["
+ <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
+ Nothing -> txt
+ wrapBlank txt = blankline <> txt <> blankline
+ fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
+blockToConTeXt (BulletList lst) = do
+ contents <- mapM listItemToConTeXt lst
+ return $ ("\\startitemize" <> if isTightList lst
+ then brackets "packed"
+ else empty) $$
+ vcat contents $$ text "\\stopitemize" <> blankline
+blockToConTeXt (OrderedList (start, style', delim) lst) = do
+ st <- get
+ let level = stOrderedListLevel st
+ put $ st {stOrderedListLevel = level + 1}
+ contents <- mapM listItemToConTeXt lst
+ put $ st {stOrderedListLevel = level}
+ let start' = if start == 1 then "" else "start=" ++ show start
+ let delim' = case delim of
+ DefaultDelim -> ""
+ Period -> "stopper=."
+ OneParen -> "stopper=)"
+ TwoParens -> "left=(,stopper=)"
+ let width = maximum $ map length $ take (length contents)
+ (orderedListMarkers (start, style', delim))
+ let width' = (toEnum width + 1) / 2
+ let width'' = if width' > (1.5 :: Double)
+ then "width=" ++ show width' ++ "em"
+ else ""
+ let specs2Items = filter (not . null) [start', delim', width'']
+ let specs2 = if null specs2Items
+ then ""
+ else "[" ++ intercalate "," specs2Items ++ "]"
+ let style'' = '[': (case style' of
+ DefaultStyle -> orderedListStyles !! level
+ Decimal -> 'n'
+ Example -> 'n'
+ LowerRoman -> 'r'
+ UpperRoman -> 'R'
+ LowerAlpha -> 'a'
+ UpperAlpha -> 'A') :
+ if isTightList lst then ",packed]" else "]"
+ let specs = style'' ++ specs2
+ return $ "\\startitemize" <> text specs $$ vcat contents $$
+ "\\stopitemize" <> blankline
+blockToConTeXt (DefinitionList lst) =
+ liftM vcat $ mapM defListItemToConTeXt lst
+blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
+-- If this is ever executed, provide a default for the reference identifier.
+blockToConTeXt (Header level attr lst) = sectionHeader attr level lst
+blockToConTeXt (Table caption aligns widths heads rows) = do
+ let colDescriptor colWidth alignment = (case alignment of
+ AlignLeft -> 'l'
+ AlignRight -> 'r'
+ AlignCenter -> 'c'
+ AlignDefault -> 'l'):
+ if colWidth == 0
+ then "|"
+ else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|")
+ let colDescriptors = "|" ++ (concat $
+ zipWith colDescriptor widths aligns)
+ headers <- if all null heads
+ then return empty
+ else liftM ($$ "\\HL") $ tableRowToConTeXt heads
+ captionText <- inlineListToConTeXt caption
+ rows' <- mapM tableRowToConTeXt rows
+ return $ "\\placetable" <> (if null caption
+ then brackets "none"
+ else empty)
+ <> braces captionText $$
+ "\\starttable" <> brackets (text colDescriptors) $$
+ "\\HL" $$ headers $$
+ vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline
+
+tableRowToConTeXt :: [[Block]] -> State WriterState Doc
+tableRowToConTeXt cols = do
+ cols' <- mapM blockListToConTeXt cols
+ return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR"
+
+listItemToConTeXt :: [Block] -> State WriterState Doc
+listItemToConTeXt list = blockListToConTeXt list >>=
+ return . ("\\item" $$) . (nest 2)
+
+defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
+defListItemToConTeXt (term, defs) = do
+ term' <- inlineListToConTeXt term
+ def' <- liftM vsep $ mapM blockListToConTeXt defs
+ return $ "\\startdescription" <> braces term' $$ nest 2 def' $$
+ "\\stopdescription" <> blankline
+
+-- | Convert list of block elements to ConTeXt.
+blockListToConTeXt :: [Block] -> State WriterState Doc
+blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst
+
+-- | Convert list of inline elements to ConTeXt.
+inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst
+ -- We add a \strut after a line break that precedes a space,
+ -- or the space gets swallowed
+ where addStruts (LineBreak : s : xs) | isSpacey s =
+ LineBreak : RawInline (Format "context") "\\strut " : s :
+ addStruts xs
+ addStruts (x:xs) = x : addStruts xs
+ addStruts [] = []
+ isSpacey Space = True
+ isSpacey (Str ('\160':_)) = True
+ isSpacey _ = False
+
+-- | Convert inline element to ConTeXt
+inlineToConTeXt :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+inlineToConTeXt (Emph lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ braces $ "\\em " <> contents
+inlineToConTeXt (Strong lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ braces $ "\\bf " <> contents
+inlineToConTeXt (Strikeout lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\overstrikes" <> braces contents
+inlineToConTeXt (Superscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\high" <> braces contents
+inlineToConTeXt (Subscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\low" <> braces contents
+inlineToConTeXt (SmallCaps lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ braces $ "\\sc " <> contents
+inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) =
+ return $ "\\type" <> braces (text str)
+inlineToConTeXt (Code _ str) = do
+ opts <- gets stOptions
+ return $ "\\mono" <> braces (text $ stringToConTeXt opts str)
+inlineToConTeXt (Quoted SingleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quote" <> braces contents
+inlineToConTeXt (Quoted DoubleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quotation" <> braces contents
+inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
+inlineToConTeXt (Str str) = do
+ opts <- gets stOptions
+ return $ text $ stringToConTeXt opts str
+inlineToConTeXt (Math InlineMath str) =
+ return $ char '$' <> text str <> char '$'
+inlineToConTeXt (Math DisplayMath str) =
+ return $ text "\\startformula " <> text str <> text " \\stopformula" <> space
+inlineToConTeXt (RawInline "context" str) = return $ text str
+inlineToConTeXt (RawInline "tex" str) = return $ text str
+inlineToConTeXt (RawInline _ _) = return empty
+inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
+inlineToConTeXt SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ return $ case wrapText of
+ WrapAuto -> space
+ WrapNone -> space
+ WrapPreserve -> cr
+inlineToConTeXt Space = return space
+-- Handle HTML-like internal document references to sections
+inlineToConTeXt (Link _ txt (('#' : ref), _)) = do
+ opts <- gets stOptions
+ contents <- inlineListToConTeXt txt
+ let ref' = toLabel $ stringToConTeXt opts ref
+ return $ text "\\goto"
+ <> braces contents
+ <> brackets (text ref')
+
+inlineToConTeXt (Link _ txt (src, _)) = do
+ let isAutolink = txt == [Str (unEscapeString src)]
+ st <- get
+ let next = stNextRef st
+ put $ st {stNextRef = next + 1}
+ let ref = "url" ++ show next
+ contents <- inlineListToConTeXt txt
+ return $ "\\useURL"
+ <> brackets (text ref)
+ <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
+ <> (if isAutolink
+ then empty
+ else brackets empty <> brackets contents)
+ <> "\\from"
+ <> brackets (text ref)
+inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do
+ opts <- gets stOptions
+ let showDim dir = let d = text (show dir) <> "="
+ in case (dimension dir attr) of
+ Just (Pixel a) ->
+ [d <> text (showInInch opts (Pixel a)) <> "in"]
+ Just (Percent a) ->
+ [d <> text (showFl (a / 100)) <> "\\textwidth"]
+ Just dim ->
+ [d <> text (show dim)]
+ Nothing ->
+ []
+ dimList = showDim Width ++ showDim Height
+ dims = if null dimList
+ then empty
+ else brackets $ cat (intersperse "," dimList)
+ clas = if null cls
+ then empty
+ else brackets $ text $ toLabel $ head cls
+ src' = if isURI src
+ then src
+ else unEscapeString src
+ return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas
+inlineToConTeXt (Note contents) = do
+ contents' <- blockListToConTeXt contents
+ let codeBlock x@(CodeBlock _ _) = [x]
+ codeBlock _ = []
+ let codeBlocks = query codeBlock contents
+ return $ if null codeBlocks
+ then text "\\footnote{" <> nest 2 contents' <> char '}'
+ else text "\\startbuffer " <> nest 2 contents' <>
+ text "\\stopbuffer\\footnote{\\getbuffer}"
+inlineToConTeXt (Span (_,_,kvs) ils) = do
+ let wrapDir txt = case lookup "dir" kvs of
+ Just "rtl" -> braces $ "\\righttoleft " <> txt
+ Just "ltr" -> braces $ "\\lefttoright " <> txt
+ _ -> txt
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
+ <> "]" <> txt <> "\\stop "
+ Nothing -> txt
+ fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
+
+-- | Craft the section header, inserting the section reference, if supplied.
+sectionHeader :: Attr
+ -> Int
+ -> [Inline]
+ -> State WriterState Doc
+sectionHeader (ident,classes,_) hdrLevel lst = do
+ contents <- inlineListToConTeXt lst
+ st <- get
+ let opts = stOptions st
+ let level' = case writerTopLevelDivision opts of
+ TopLevelPart -> hdrLevel - 2
+ TopLevelChapter -> hdrLevel - 1
+ TopLevelSection -> hdrLevel
+ TopLevelDefault -> hdrLevel
+ let ident' = toLabel ident
+ let (section, chapter) = if "unnumbered" `elem` classes
+ then (text "subject", text "title")
+ else (text "section", text "chapter")
+ return $ case level' of
+ -1 -> text "\\part" <> braces contents
+ 0 -> char '\\' <> chapter <> braces contents
+ n | n >= 1 && n <= 5 -> char '\\'
+ <> text (concat (replicate (n - 1) "sub"))
+ <> section
+ <> (if (not . null) ident'
+ then brackets (text ident')
+ else empty)
+ <> braces contents
+ <> blankline
+ _ -> contents <> blankline
+
+fromBcp47' :: String -> String
+fromBcp47' = fromBcp47 . splitBy (=='-')
+
+-- Takes a list of the constituents of a BCP 47 language code
+-- and irons out ConTeXt's exceptions
+-- https://tools.ietf.org/html/bcp47#section-2.1
+-- http://wiki.contextgarden.net/Language_Codes
+fromBcp47 :: [String] -> String
+fromBcp47 [] = ""
+fromBcp47 ("ar":"SY":_) = "ar-sy"
+fromBcp47 ("ar":"IQ":_) = "ar-iq"
+fromBcp47 ("ar":"JO":_) = "ar-jo"
+fromBcp47 ("ar":"LB":_) = "ar-lb"
+fromBcp47 ("ar":"DZ":_) = "ar-dz"
+fromBcp47 ("ar":"MA":_) = "ar-ma"
+fromBcp47 ("de":"1901":_) = "deo"
+fromBcp47 ("de":"DE":_) = "de-de"
+fromBcp47 ("de":"AT":_) = "de-at"
+fromBcp47 ("de":"CH":_) = "de-ch"
+fromBcp47 ("el":"poly":_) = "agr"
+fromBcp47 ("en":"US":_) = "en-us"
+fromBcp47 ("en":"GB":_) = "en-gb"
+fromBcp47 ("grc":_) = "agr"
+fromBcp47 x = fromIso $ head x
+ where
+ fromIso "el" = "gr"
+ fromIso "eu" = "ba"
+ fromIso "he" = "il"
+ fromIso "jp" = "ja"
+ fromIso "uk" = "ua"
+ fromIso "vi" = "vn"
+ fromIso "zh" = "cn"
+ fromIso l = l
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
new file mode 100644
index 000000000..cf641dcd6
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -0,0 +1,322 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE FlexibleInstances, OverloadedStrings,
+ ScopedTypeVariables, DeriveDataTypeable, CPP #-}
+#if MIN_VERSION_base(4,8,0)
+#else
+{-# LANGUAGE OverlappingInstances #-}
+#endif
+{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Custom
+ Copyright : Copyright (C) 2012-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to custom markup using
+a lua writer.
+-}
+module Text.Pandoc.Writers.Custom ( writeCustom ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Data.List ( intersperse )
+import Data.Char ( toLower )
+import Data.Typeable
+import Scripting.Lua (LuaState, StackValue, callfunc)
+import Text.Pandoc.Writers.Shared
+import qualified Scripting.Lua as Lua
+import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad (when)
+import Control.Exception
+import qualified Data.Map as M
+import Text.Pandoc.Templates
+import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8)
+
+attrToMap :: Attr -> M.Map String String
+attrToMap (id',classes,keyvals) = M.fromList
+ $ ("id", id')
+ : ("class", unwords classes)
+ : keyvals
+
+#if MIN_VERSION_hslua(0,4,0)
+#if MIN_VERSION_base(4,8,0)
+instance {-# OVERLAPS #-} StackValue [Char] where
+#else
+instance StackValue [Char] where
+#endif
+ push lua cs = Lua.push lua (UTF8.fromString cs)
+ peek lua i = do
+ res <- Lua.peek lua i
+ return $ UTF8.toString `fmap` res
+ valuetype _ = Lua.TSTRING
+#else
+#if MIN_VERSION_base(4,8,0)
+instance {-# OVERLAPS #-} StackValue a => StackValue [a] where
+#else
+instance StackValue a => StackValue [a] where
+#endif
+ push lua xs = do
+ Lua.createtable lua (length xs + 1) 0
+ let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i
+ mapM_ addValue $ zip [1..] xs
+ peek lua i = do
+ top <- Lua.gettop lua
+ let i' = if i < 0 then top + i + 1 else i
+ Lua.pushnil lua
+ lst <- getList lua i'
+ Lua.pop lua 1
+ return (Just lst)
+ valuetype _ = Lua.TTABLE
+
+getList :: StackValue a => LuaState -> Int -> IO [a]
+getList lua i' = do
+ continue <- Lua.next lua i'
+ if continue
+ then do
+ next <- Lua.peek lua (-1)
+ Lua.pop lua 1
+ x <- maybe (fail "peek returned Nothing") return next
+ rest <- getList lua i'
+ return (x : rest)
+ else return []
+#endif
+
+instance StackValue Format where
+ push lua (Format f) = Lua.push lua (map toLower f)
+ peek l n = fmap Format `fmap` Lua.peek l n
+ valuetype _ = Lua.TSTRING
+
+instance (StackValue a, StackValue b) => StackValue (M.Map a b) where
+ push lua m = do
+ let xs = M.toList m
+ Lua.createtable lua (length xs + 1) 0
+ let addValue (k, v) = Lua.push lua k >> Lua.push lua v >>
+ Lua.rawset lua (-3)
+ mapM_ addValue xs
+ peek _ _ = undefined -- not needed for our purposes
+ valuetype _ = Lua.TTABLE
+
+instance (StackValue a, StackValue b) => StackValue (a,b) where
+ push lua (k,v) = do
+ Lua.createtable lua 2 0
+ Lua.push lua k
+ Lua.push lua v
+ Lua.rawset lua (-3)
+ peek _ _ = undefined -- not needed for our purposes
+ valuetype _ = Lua.TTABLE
+
+#if MIN_VERSION_base(4,8,0)
+instance {-# OVERLAPS #-} StackValue [Inline] where
+#else
+instance StackValue [Inline] where
+#endif
+ push l ils = Lua.push l =<< inlineListToCustom l ils
+ peek _ _ = undefined
+ valuetype _ = Lua.TSTRING
+
+#if MIN_VERSION_base(4,8,0)
+instance {-# OVERLAPS #-} StackValue [Block] where
+#else
+instance StackValue [Block] where
+#endif
+ push l ils = Lua.push l =<< blockListToCustom l ils
+ peek _ _ = undefined
+ valuetype _ = Lua.TSTRING
+
+instance StackValue MetaValue where
+ push l (MetaMap m) = Lua.push l m
+ push l (MetaList xs) = Lua.push l xs
+ push l (MetaBool x) = Lua.push l x
+ push l (MetaString s) = Lua.push l s
+ push l (MetaInlines ils) = Lua.push l ils
+ push l (MetaBlocks bs) = Lua.push l bs
+ peek _ _ = undefined
+ valuetype (MetaMap _) = Lua.TTABLE
+ valuetype (MetaList _) = Lua.TTABLE
+ valuetype (MetaBool _) = Lua.TBOOLEAN
+ valuetype (MetaString _) = Lua.TSTRING
+ valuetype (MetaInlines _) = Lua.TSTRING
+ valuetype (MetaBlocks _) = Lua.TSTRING
+
+instance StackValue Citation where
+ push lua cit = do
+ Lua.createtable lua 6 0
+ let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >>
+ Lua.rawset lua (-3)
+ addValue ("citationId", citationId cit)
+ addValue ("citationPrefix", citationPrefix cit)
+ addValue ("citationSuffix", citationSuffix cit)
+ addValue ("citationMode", show (citationMode cit))
+ addValue ("citationNoteNum", citationNoteNum cit)
+ addValue ("citationHash", citationHash cit)
+ peek = undefined
+ valuetype _ = Lua.TTABLE
+
+data PandocLuaException = PandocLuaException String
+ deriving (Show, Typeable)
+
+instance Exception PandocLuaException
+
+-- | Convert Pandoc to custom markup.
+writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
+writeCustom luaFile opts doc@(Pandoc meta _) = do
+ luaScript <- UTF8.readFile luaFile
+ enc <- getForeignEncoding
+ setForeignEncoding utf8
+ lua <- Lua.newstate
+ Lua.openlibs lua
+ status <- Lua.loadstring lua luaScript luaFile
+ -- check for error in lua script (later we'll change the return type
+ -- to handle this more gracefully):
+ when (status /= 0) $
+#if MIN_VERSION_hslua(0,4,0)
+ Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString
+#else
+ Lua.tostring lua 1 >>= throw . PandocLuaException
+#endif
+ Lua.call lua 0 0
+ -- TODO - call hierarchicalize, so we have that info
+ rendered <- docToCustom lua opts doc
+ context <- metaToJSON opts
+ (blockListToCustom lua)
+ (inlineListToCustom lua)
+ meta
+ Lua.close lua
+ setForeignEncoding enc
+ let body = rendered
+ case writerTemplate opts of
+ Nothing -> return body
+ Just tpl -> return $ renderTemplate' tpl $ setField "body" body context
+
+docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String
+docToCustom lua opts (Pandoc (Meta metamap) blocks) = do
+ body <- blockListToCustom lua blocks
+ callfunc lua "Doc" body metamap (writerVariables opts)
+
+-- | Convert Pandoc block element to Custom.
+blockToCustom :: LuaState -- ^ Lua state
+ -> Block -- ^ Block element
+ -> IO String
+
+blockToCustom _ Null = return ""
+
+blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines
+
+blockToCustom lua (Para [Image attr txt (src,tit)]) =
+ callfunc lua "CaptionedImage" src tit txt (attrToMap attr)
+
+blockToCustom lua (Para inlines) = callfunc lua "Para" inlines
+
+blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList
+
+blockToCustom lua (RawBlock format str) =
+ callfunc lua "RawBlock" format str
+
+blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule"
+
+blockToCustom lua (Header level attr inlines) =
+ callfunc lua "Header" level inlines (attrToMap attr)
+
+blockToCustom lua (CodeBlock attr str) =
+ callfunc lua "CodeBlock" str (attrToMap attr)
+
+blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks
+
+blockToCustom lua (Table capt aligns widths headers rows') =
+ callfunc lua "Table" capt (map show aligns) widths headers rows'
+
+blockToCustom lua (BulletList items) = callfunc lua "BulletList" items
+
+blockToCustom lua (OrderedList (num,sty,delim) items) =
+ callfunc lua "OrderedList" items num (show sty) (show delim)
+
+blockToCustom lua (DefinitionList items) =
+ callfunc lua "DefinitionList" items
+
+blockToCustom lua (Div attr items) =
+ callfunc lua "Div" items (attrToMap attr)
+
+-- | Convert list of Pandoc block elements to Custom.
+blockListToCustom :: LuaState -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> IO String
+blockListToCustom lua xs = do
+ blocksep <- callfunc lua "Blocksep"
+ bs <- mapM (blockToCustom lua) xs
+ return $ mconcat $ intersperse blocksep bs
+
+-- | Convert list of Pandoc inline elements to Custom.
+inlineListToCustom :: LuaState -> [Inline] -> IO String
+inlineListToCustom lua lst = do
+ xs <- mapM (inlineToCustom lua) lst
+ return $ concat xs
+
+-- | Convert Pandoc inline element to Custom.
+inlineToCustom :: LuaState -> Inline -> IO String
+
+inlineToCustom lua (Str str) = callfunc lua "Str" str
+
+inlineToCustom lua Space = callfunc lua "Space"
+
+inlineToCustom lua SoftBreak = callfunc lua "SoftBreak"
+
+inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst
+
+inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst
+
+inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst
+
+inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst
+
+inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst
+
+inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst
+
+inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst
+
+inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst
+
+inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs
+
+inlineToCustom lua (Code attr str) =
+ callfunc lua "Code" str (attrToMap attr)
+
+inlineToCustom lua (Math DisplayMath str) =
+ callfunc lua "DisplayMath" str
+
+inlineToCustom lua (Math InlineMath str) =
+ callfunc lua "InlineMath" str
+
+inlineToCustom lua (RawInline format str) =
+ callfunc lua "RawInline" format str
+
+inlineToCustom lua (LineBreak) = callfunc lua "LineBreak"
+
+inlineToCustom lua (Link attr txt (src,tit)) =
+ callfunc lua "Link" txt src tit (attrToMap attr)
+
+inlineToCustom lua (Image attr alt (src,tit)) =
+ callfunc lua "Image" alt src tit (attrToMap attr)
+
+inlineToCustom lua (Note contents) = callfunc lua "Note" contents
+
+inlineToCustom lua (Span attr items) =
+ callfunc lua "Span" items (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
new file mode 100644
index 000000000..597851f65
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -0,0 +1,440 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math
+import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf )
+import Data.Char ( toLower )
+import Data.Monoid ( Any(..) )
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import qualified Text.Pandoc.Builder as B
+import Text.TeXMath
+import qualified Text.XML.Light as Xml
+import Data.Generics (everywhere, mkT)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Control.Monad.Reader
+
+data DocBookVersion = DocBook4 | DocBook5
+ deriving (Eq, Show)
+
+type DB = ReaderT DocBookVersion
+
+-- | Convert list of authors to a docbook <author> section
+authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
+authorToDocbook opts name' = do
+ name <- render Nothing <$> inlinesToDocbook opts name'
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ return $ B.rawInline "docbook" $ render colwidth $
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = triml rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (intercalate " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+
+writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeDocbook4 opts d =
+ runReaderT (writeDocbook opts d) DocBook4
+
+writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeDocbook5 opts d =
+ runReaderT (writeDocbook opts d) DocBook5
+
+-- | Convert Pandoc document to string in Docbook format.
+writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String
+writeDocbook opts (Pandoc meta blocks) = do
+ let elements = hierarchicalize blocks
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' = render colwidth
+ let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr)
+ (writerTemplate opts) &&
+ TopLevelDefault == writerTopLevelDivision opts)
+ then opts{ writerTopLevelDivision = TopLevelChapter }
+ else opts
+ -- The numbering here follows LaTeX's internal numbering
+ let startLvl = case writerTopLevelDivision opts' of
+ TopLevelPart -> -1
+ TopLevelChapter -> 0
+ TopLevelSection -> 1
+ TopLevelDefault -> 1
+ auths' <- mapM (authorToDocbook opts) $ docAuthors meta
+ let meta' = B.setMeta "author" auths' meta
+ metadata <- metaToJSON opts
+ (fmap (render colwidth . vcat) .
+ (mapM (elementToDocbook opts' startLvl) .
+ hierarchicalize))
+ (fmap (render colwidth) . inlinesToDocbook opts')
+ meta'
+ main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements)
+ let context = defField "body" main
+ $ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False)
+ $ metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Convert an Element to Docbook.
+elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
+elementToDocbook opts _ (Blk block) = blockToDocbook opts block
+elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do
+ version <- ask
+ -- Docbook doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ tag = case lvl of
+ -1 -> "part"
+ 0 -> "chapter"
+ n | n >= 1 && n <= 5 -> if version == DocBook5
+ then "section"
+ else "sect" ++ show n
+ _ -> "simplesect"
+ idName = if version == DocBook5
+ then "xml:id"
+ else "id"
+ idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')]
+ nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+ else []
+ attribs = nsAttr ++ idAttr
+ contents <- mapM (elementToDocbook opts (lvl + 1)) elements'
+ title' <- inlinesToDocbook opts title
+ return $ inTags True tag attribs $
+ inTagsSimple "title" title' $$ vcat contents
+
+-- | Convert a list of Pandoc blocks to Docbook.
+blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
+blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- Docbook varlistentrys.
+deflistItemsToDocbook :: PandocMonad m
+ => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc
+deflistItemsToDocbook opts items =
+ vcat <$> mapM (\(term, defs) -> deflistItemToDocbook opts term defs) items
+
+-- | Convert a term and a list of blocks into a Docbook varlistentry.
+deflistItemToDocbook :: PandocMonad m
+ => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc
+deflistItemToDocbook opts term defs = do
+ term' <- inlinesToDocbook opts term
+ def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs
+ return $ inTagsIndented "varlistentry" $
+ inTagsIndented "term" term' $$
+ inTagsIndented "listitem" def'
+
+-- | Convert a list of lists of blocks to a list of Docbook list items.
+listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc
+listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items
+
+-- | Convert a list of blocks into a Docbook list item.
+listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc
+listItemToDocbook opts item =
+ inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item)
+
+imageToDocbook :: WriterOptions -> Attr -> String -> Doc
+imageToDocbook _ attr src = selfClosingTag "imagedata" $
+ ("fileref", src) : idAndRole attr ++ dims
+ where
+ dims = go Width "width" ++ go Height "depth"
+ go dir dstr = case (dimension dir attr) of
+ Just a -> [(dstr, show a)]
+ Nothing -> []
+
+-- | Convert a Pandoc block element to Docbook.
+blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc
+blockToDocbook _ Null = return empty
+-- Add ids to paragraphs in divs with ids - this is needed for
+-- pandoc-citeproc to get link anchors in bibliographies:
+blockToDocbook opts (Div (ident,_,_) [Para lst]) =
+ let attribs = [("id", ident) | not (null ident)] in
+ if hasLineBreaks lst
+ then (flush . nowrap . inTags False "literallayout" attribs)
+ <$> inlinesToDocbook opts lst
+ else inTags True "para" attribs <$> inlinesToDocbook opts lst
+blockToDocbook opts (Div (ident,_,_) bs) = do
+ contents <- blocksToDocbook opts (map plainToPara bs)
+ return $
+ (if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) $$ contents
+blockToDocbook _ (Header _ _ _) =
+ return empty -- should not occur after hierarchicalize
+blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
+-- title beginning with fig: indicates that the image is a figure
+blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
+ alt <- inlinesToDocbook opts txt
+ let capt = if null txt
+ then empty
+ else inTagsSimple "title" alt
+ return $ inTagsIndented "figure" $
+ capt $$
+ (inTagsIndented "mediaobject" $
+ (inTagsIndented "imageobject"
+ (imageToDocbook opts attr src)) $$
+ inTagsSimple "textobject" (inTagsSimple "phrase" alt))
+blockToDocbook opts (Para lst)
+ | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout")
+ <$> inlinesToDocbook opts lst
+ | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst
+blockToDocbook opts (LineBlock lns) =
+ blockToDocbook opts $ linesToPara lns
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
+blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
+ text ("<programlisting" ++ lang ++ ">") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</programlisting>")
+ where lang = if null langs
+ then ""
+ else " language=\"" ++ escapeStringForXML (head langs) ++
+ "\""
+ isLang l = map toLower l `elem` map (map toLower) languages
+ langsFrom s = if isLang s
+ then [s]
+ else languagesByExtension . map toLower $ s
+ langs = concatMap langsFrom classes
+blockToDocbook opts (BulletList lst) = do
+ let attribs = [("spacing", "compact") | isTightList lst]
+ inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst
+blockToDocbook _ (OrderedList _ []) = return empty
+blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do
+ let numeration = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("numeration", "arabic")]
+ Example -> [("numeration", "arabic")]
+ UpperAlpha -> [("numeration", "upperalpha")]
+ LowerAlpha -> [("numeration", "loweralpha")]
+ UpperRoman -> [("numeration", "upperroman")]
+ LowerRoman -> [("numeration", "lowerroman")]
+ spacing = [("spacing", "compact") | isTightList (first:rest)]
+ attribs = numeration ++ spacing
+ items <- if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else do
+ first' <- blocksToDocbook opts (map plainToPara first)
+ rest' <- listItemsToDocbook opts rest
+ return $
+ (inTags True "listitem" [("override",show start)] first') $$
+ rest'
+ return $ inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) = do
+ let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst]
+ inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst
+blockToDocbook _ b@(RawBlock f str)
+ | f == "docbook" = return $ text str -- raw XML block
+ | f == "html" = do
+ version <- ask
+ if version == DocBook5
+ then return empty -- No html in Docbook5
+ else return $ text str -- allow html for backwards compatibility
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToDocbook _ HorizontalRule = return empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) = do
+ captionDoc <- if null caption
+ then return empty
+ else inTagsIndented "title" <$>
+ inlinesToDocbook opts caption
+ let tableType = if isEmpty captionDoc then "informaltable" else "table"
+ percent w = show (truncate (100*w) :: Integer) ++ "*"
+ coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec"
+ ([("colwidth", percent w) | w > 0] ++
+ [("align", alignmentToString al)])) widths aligns
+ head' <- if all null headers
+ then return empty
+ else inTagsIndented "thead" <$> tableRowToDocbook opts headers
+ body' <- (inTagsIndented "tbody" . vcat) <$>
+ mapM (tableRowToDocbook opts) rows
+ return $ inTagsIndented tableType $ captionDoc $$
+ (inTags True "tgroup" [("cols", show (length headers))] $
+ coltags $$ head' $$ body')
+
+hasLineBreaks :: [Inline] -> Bool
+hasLineBreaks = getAny . query isLineBreak . walk removeNote
+ where
+ removeNote :: Inline -> Inline
+ removeNote (Note _) = Str ""
+ removeNote x = x
+ isLineBreak :: Inline -> Any
+ isLineBreak LineBreak = Any True
+ isLineBreak _ = Any False
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook :: PandocMonad m
+ => WriterOptions
+ -> [[Block]]
+ -> DB m Doc
+tableRowToDocbook opts cols =
+ (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols
+
+tableItemToDocbook :: PandocMonad m
+ => WriterOptions
+ -> [Block]
+ -> DB m Doc
+tableItemToDocbook opts item =
+ (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item
+
+-- | Convert a list of inline elements to Docbook.
+inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc
+inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst
+
+-- | Convert an inline element to Docbook.
+inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc
+inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple "emphasis" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Strong lst) =
+ inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Strikeout lst) =
+ inTags False "emphasis" [("role", "strikethrough")] <$>
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Superscript lst) =
+ inTagsSimple "superscript" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Subscript lst) =
+ inTagsSimple "subscript" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (SmallCaps lst) =
+ inTags False "emphasis" [("role", "smallcaps")] <$>
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Quoted _ lst) =
+ inTagsSimple "quote" <$> inlinesToDocbook opts lst
+inlineToDocbook opts (Cite _ lst) =
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Span (ident,_,_) ils) =
+ ((if null ident
+ then mempty
+ else selfClosingTag "anchor" [("id", ident)]) <>) <$>
+ inlinesToDocbook opts ils
+inlineToDocbook _ (Code _ str) =
+ return $ inTagsSimple "literal" $ text (escapeStringForXML str)
+inlineToDocbook opts (Math t str)
+ | isMathML (writerHTMLMathMethod opts) = do
+ res <- convertMath writeMathML t str
+ case res of
+ Right r -> return $ inTagsSimple tagtype
+ $ text $ Xml.ppcElement conf
+ $ fixNS
+ $ removeAttr r
+ Left il -> inlineToDocbook opts il
+ | otherwise =
+ texMathToInlines t str >>= inlinesToDocbook opts
+ where tagtype = case t of
+ InlineMath -> "inlineequation"
+ DisplayMath -> "informalequation"
+ conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
+ removeAttr e = e{ Xml.elAttribs = [] }
+ fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
+ fixNS = everywhere (mkT fixNS')
+inlineToDocbook _ il@(RawInline f x)
+ | f == "html" || f == "docbook" = return $ text x
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToDocbook _ LineBreak = return $ text "\n"
+-- currently ignore, would require the option to add custom
+-- styles to the document
+inlineToDocbook _ Space = return space
+-- because we use \n for LineBreak, we can't do soft breaks:
+inlineToDocbook _ SoftBreak = return space
+inlineToDocbook opts (Link attr txt (src, _))
+ | Just email <- stripPrefix "mailto:" src =
+ let emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ email
+ in case txt of
+ [Str s] | escapeURI s == email -> return emailLink
+ _ -> do contents <- inlinesToDocbook opts txt
+ return $ contents <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise = do
+ version <- ask
+ (if isPrefixOf "#" src
+ then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr
+ else if version == DocBook5
+ then inTags False "link" $ ("xlink:href", src) : idAndRole attr
+ else inTags False "ulink" $ ("url", src) : idAndRole attr )
+ <$> inlinesToDocbook opts txt
+inlineToDocbook opts (Image attr _ (src, tit)) = return $
+ let titleDoc = if null tit
+ then empty
+ else inTagsIndented "objectinfo" $
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ imageToDocbook opts attr src
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" <$> blocksToDocbook opts contents
+
+isMathML :: HTMLMathMethod -> Bool
+isMathML MathML = True
+isMathML _ = False
+
+idAndRole :: Attr -> [(String, String)]
+idAndRole (id',cls,_) = ident ++ role
+ where
+ ident = if null id'
+ then []
+ else [("id", id')]
+ role = if null cls
+ then []
+ else [("role", unwords cls)]
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
new file mode 100644
index 000000000..56aa29211
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -0,0 +1,1302 @@
+{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns, DeriveFunctor #-}
+{-
+Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docx
+ Copyright : Copyright (C) 2012-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to docx.
+-}
+module Text.Pandoc.Writers.Docx ( writeDocx ) where
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Char8 as BL8
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Text.Pandoc.UTF8 as UTF8
+import Codec.Archive.Zip
+import Data.Time.Clock.POSIX
+import Text.Pandoc.Compat.Time
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Writers.Shared (fixDisplayMath)
+import Text.Pandoc.Options
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Highlighting ( highlight )
+import Text.Pandoc.Walk
+import Text.Pandoc.Error (PandocError)
+import Text.XML.Light as XML
+import Text.TeXMath
+import Text.Pandoc.Readers.Docx.StyleMap
+import Control.Monad.Reader
+import Control.Monad.State
+import Skylighting
+import Control.Monad.Except (runExceptT)
+import System.Random (randomR)
+import Text.Printf (printf)
+import Data.Monoid ((<>))
+import qualified Data.Text as T
+import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
+ extensionFromMimeType)
+import Control.Applicative ((<|>))
+import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing)
+import Data.Char (ord, isSpace, toLower)
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Logging
+
+data ListMarker = NoMarker
+ | BulletMarker
+ | NumberMarker ListNumberStyle ListNumberDelim Int
+ deriving (Show, Read, Eq, Ord)
+
+listMarkerToId :: ListMarker -> String
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
+listMarkerToId (NumberMarker sty delim n) =
+ '9' : '9' : styNum : delimNum : show n
+ where styNum = case sty of
+ DefaultStyle -> '2'
+ Example -> '3'
+ Decimal -> '4'
+ LowerRoman -> '5'
+ UpperRoman -> '6'
+ LowerAlpha -> '7'
+ UpperAlpha -> '8'
+ delimNum = case delim of
+ DefaultDelim -> '0'
+ Period -> '1'
+ OneParen -> '2'
+ TwoParens -> '3'
+
+data WriterEnv = WriterEnv{ envTextProperties :: [Element]
+ , envParaProperties :: [Element]
+ , envRTL :: Bool
+ , envListLevel :: Int
+ , envListNumId :: Int
+ , envInDel :: Bool
+ , envChangesAuthor :: String
+ , envChangesDate :: String
+ , envPrintWidth :: Integer
+ }
+
+defaultWriterEnv :: WriterEnv
+defaultWriterEnv = WriterEnv{ envTextProperties = []
+ , envParaProperties = []
+ , envRTL = False
+ , envListLevel = -1
+ , envListNumId = 1
+ , envInDel = False
+ , envChangesAuthor = "unknown"
+ , envChangesDate = "1969-12-31T19:00:00Z"
+ , envPrintWidth = 1
+ }
+
+data WriterState = WriterState{
+ stFootnotes :: [Element]
+ , stSectionIds :: Set.Set String
+ , stExternalLinks :: M.Map String String
+ , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
+ , stLists :: [ListMarker]
+ , stInsId :: Int
+ , stDelId :: Int
+ , stStyleMaps :: StyleMaps
+ , stFirstPara :: Bool
+ , stTocTitle :: [Inline]
+ , stDynamicParaProps :: [String]
+ , stDynamicTextProps :: [String]
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+ stFootnotes = defaultFootnotes
+ , stSectionIds = Set.empty
+ , stExternalLinks = M.empty
+ , stImages = M.empty
+ , stLists = [NoMarker]
+ , stInsId = 1
+ , stDelId = 1
+ , stStyleMaps = defaultStyleMaps
+ , stFirstPara = False
+ , stTocTitle = [Str "Table of Contents"]
+ , stDynamicParaProps = []
+ , stDynamicTextProps = []
+ }
+
+type WS m = ReaderT WriterEnv (StateT WriterState m)
+
+mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode s attrs =
+ add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s)
+
+nodename :: String -> QName
+nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
+ where (name, prefix) = case break (==':') s of
+ (xs,[]) -> (xs, Nothing)
+ (ys, _:zs) -> (zs, Just ys)
+
+toLazy :: B.ByteString -> BL.ByteString
+toLazy = BL.fromChunks . (:[])
+
+renderXml :: Element -> BL.ByteString
+renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
+ UTF8.fromStringLazy (showElement elt)
+
+renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap _ [] = M.empty
+renumIdMap n (e:es)
+ | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
+ M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es)
+ | otherwise = renumIdMap n es
+
+replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr _ _ [] = []
+replaceAttr f val (a:as) | f (attrKey a) =
+ (XML.Attr (attrKey a) val) : (replaceAttr f val as)
+ | otherwise = a : (replaceAttr f val as)
+
+renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element
+renumId f renumMap e
+ | Just oldId <- findAttrBy f e
+ , Just newId <- M.lookup oldId renumMap =
+ let attrs' = replaceAttr f newId (elAttribs e)
+ in
+ e { elAttribs = attrs' }
+ | otherwise = e
+
+renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
+renumIds f renumMap = map (renumId f renumMap)
+
+-- | Certain characters are invalid in XML even if escaped.
+-- See #1992
+stripInvalidChars :: String -> String
+stripInvalidChars = filter isValidChar
+
+-- | See XML reference
+isValidChar :: Char -> Bool
+isValidChar (ord -> c)
+ | c == 0x9 = True
+ | c == 0xA = True
+ | c == 0xD = True
+ | 0x20 <= c && c <= 0xD7FF = True
+ | 0xE000 <= c && c <= 0xFFFD = True
+ | 0x10000 <= c && c <= 0x10FFFF = True
+ | otherwise = False
+
+metaValueToInlines :: MetaValue -> [Inline]
+metaValueToInlines (MetaString s) = [Str s]
+metaValueToInlines (MetaInlines ils) = ils
+metaValueToInlines (MetaBlocks bs) = query return bs
+metaValueToInlines (MetaBool b) = [Str $ show b]
+metaValueToInlines _ = []
+
+
+
+writeDocx :: (PandocMonad m)
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m BL.ByteString
+writeDocx opts doc@(Pandoc meta _) = do
+ let datadir = writerUserDataDir opts
+ let doc' = walk fixDisplayMath $ doc
+ username <- P.lookupEnv "USERNAME"
+ utctime <- P.getCurrentTime
+ distArchive <- (toArchive . BL.fromStrict) <$>
+ P.readDataFile datadir "reference.docx"
+ refArchive <- case writerReferenceDoc opts of
+ Just f -> toArchive <$> P.readFileLazy f
+ Nothing -> return distArchive
+
+ parsedDoc <- parseXml refArchive distArchive "word/document.xml"
+ let wname f qn = qPrefix qn == Just "w" && f (qName qn)
+ let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
+
+ -- Gets the template size
+ let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz")))
+ let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName))
+
+ let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar")))
+ let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName))
+ let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName))
+
+ -- Get the avaible area (converting the size and the margins to int and
+ -- doing the difference
+ let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer)
+ <*> (
+ (+) <$> (read <$> mbAttrMarRight ::Maybe Integer)
+ <*> (read <$> mbAttrMarLeft ::Maybe Integer)
+ )
+
+ -- styles
+ let stylepath = "word/styles.xml"
+ styledoc <- parseXml refArchive distArchive stylepath
+
+ -- parse styledoc for heading styles
+ let styleMaps = getStyleMaps styledoc
+
+ let tocTitle = fromMaybe (stTocTitle defaultWriterState) $
+ metaValueToInlines <$> lookupMeta "toc-title" meta
+
+ let initialSt = defaultWriterState {
+ stStyleMaps = styleMaps
+ , stTocTitle = tocTitle
+ }
+
+ let isRTLmeta = case lookupMeta "dir" meta of
+ Just (MetaString "rtl") -> True
+ Just (MetaInlines [Str "rtl"]) -> True
+ _ -> False
+
+ let env = defaultWriterEnv {
+ envRTL = isRTLmeta
+ , envChangesAuthor = fromMaybe "unknown" username
+ , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
+ , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
+ }
+
+
+ ((contents, footnotes), st) <- runStateT
+ (runReaderT
+ (writeOpenXML opts{writerWrapText = WrapNone} doc')
+ env)
+ initialSt
+ let epochtime = floor $ utcTimeToPOSIXSeconds utctime
+ let imgs = M.elems $ stImages st
+
+ -- create entries for images in word/media/...
+ let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
+ let imageEntries = map toImageEntry imgs
+
+ let stdAttributes =
+ [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+ ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+ ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+ ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+ ,("xmlns:v","urn:schemas-microsoft-com:vml")
+ ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+ ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+ ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+ ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+
+
+ parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels"
+ let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
+ let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
+ let headers = filterElements isHeaderNode parsedRels
+ let footers = filterElements isFooterNode parsedRels
+
+ let extractTarget = findAttr (QName "Target" Nothing Nothing)
+
+ -- we create [Content_Types].xml and word/_rels/document.xml.rels
+ -- from scratch rather than reading from reference.docx,
+ -- because Word sometimes changes these files when a reference.docx is modified,
+ -- e.g. deleting the reference to footnotes.xml or removing default entries
+ -- for image content types.
+
+ -- [Content_Types].xml
+ let mkOverrideNode (part', contentType') = mknode "Override"
+ [("PartName",part'),("ContentType",contentType')] ()
+ let mkImageOverride (_, imgpath, mbMimeType, _, _) =
+ mkOverrideNode ("/word/" ++ imgpath,
+ fromMaybe "application/octet-stream" mbMimeType)
+ let mkMediaOverride imgpath =
+ mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath)
+ let overrides = map mkOverrideNode (
+ [("/word/webSettings.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
+ ,("/word/numbering.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml")
+ ,("/word/settings.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml")
+ ,("/word/theme/theme1.xml",
+ "application/vnd.openxmlformats-officedocument.theme+xml")
+ ,("/word/fontTable.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml")
+ ,("/docProps/app.xml",
+ "application/vnd.openxmlformats-officedocument.extended-properties+xml")
+ ,("/docProps/core.xml",
+ "application/vnd.openxmlformats-package.core-properties+xml")
+ ,("/word/styles.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml")
+ ,("/word/document.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml")
+ ,("/word/footnotes.xml",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
+ ] ++
+ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
+ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
+ map mkImageOverride imgs ++
+ map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive
+ , "word/media/" `isPrefixOf` eRelativePath e ]
+
+ let defaultnodes = [mknode "Default"
+ [("Extension","xml"),("ContentType","application/xml")] (),
+ mknode "Default"
+ [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()]
+ let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides
+ let contentTypesEntry = toEntry "[Content_Types].xml" epochtime
+ $ renderXml contentTypesDoc
+
+ -- word/_rels/document.xml.rels
+ let toBaseRel (url', id', target') = mknode "Relationship"
+ [("Type",url')
+ ,("Id",id')
+ ,("Target",target')] ()
+ let baserels' = map toBaseRel
+ [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering",
+ "rId1",
+ "numbering.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles",
+ "rId2",
+ "styles.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings",
+ "rId3",
+ "settings.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings",
+ "rId4",
+ "webSettings.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
+ "rId5",
+ "fontTable.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme",
+ "rId6",
+ "theme/theme1.xml")
+ ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
+ "rId7",
+ "footnotes.xml")
+ ]
+
+ let idMap = renumIdMap (length baserels' + 1) (headers ++ footers)
+ let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
+ let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
+ let baserels = baserels' ++ renumHeaders ++ renumFooters
+ let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+ let imgrels = map toImgRel imgs
+ let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
+ let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
+ let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels
+ let relEntry = toEntry "word/_rels/document.xml.rels" epochtime
+ $ renderXml reldoc
+
+
+ -- adjust contents to add sectPr from reference.docx
+ let sectpr = case mbsectpr of
+ Just sectpr' -> let cs = renumIds
+ (\q -> qName q == "id" && qPrefix q == Just "r")
+ idMap
+ (elChildren sectpr')
+ in
+ add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs
+ Nothing -> (mknode "w:sectPr" [] ())
+
+ -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
+ let contents' = contents ++ [sectpr]
+ let docContents = mknode "w:document" stdAttributes
+ $ mknode "w:body" [] contents'
+
+
+
+ -- word/document.xml
+ let contentEntry = toEntry "word/document.xml" epochtime
+ $ renderXml docContents
+
+ -- footnotes
+ let notes = mknode "w:footnotes" stdAttributes footnotes
+ let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes
+
+ -- footnote rels
+ let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime
+ $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")]
+ linkrels
+
+ -- styles
+
+ -- We only want to inject paragraph and text properties that
+ -- are not already in the style map. Note that keys in the stylemap
+ -- are normalized as lowercase.
+ let newDynamicParaProps = filter
+ (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps)
+ (stDynamicParaProps st)
+
+ newDynamicTextProps = filter
+ (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps)
+ (stDynamicTextProps st)
+
+ let newstyles = map newParaPropToOpenXml newDynamicParaProps ++
+ map newTextPropToOpenXml newDynamicTextProps ++
+ (case writerHighlightStyle opts of
+ Nothing -> []
+ Just sty -> (styleToOpenXml styleMaps sty))
+ let styledoc' = styledoc{ elContent = elContent styledoc ++
+ map Elem newstyles }
+ let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
+
+ -- construct word/numbering.xml
+ let numpath = "word/numbering.xml"
+ numbering <- parseXml refArchive distArchive numpath
+ newNumElts <- mkNumbering (stLists st)
+ let allElts = onlyElems (elContent numbering) ++ newNumElts
+ let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent =
+ -- we want all the abstractNums first, then the nums,
+ -- otherwise things break:
+ [Elem e | e <- allElts
+ , qName (elName e) == "abstractNum" ] ++
+ [Elem e | e <- allElts
+ , qName (elName e) == "num" ] }
+ let docPropsPath = "docProps/core.xml"
+ let docProps = mknode "cp:coreProperties"
+ [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
+ ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:dcterms","http://purl.org/dc/terms/")
+ ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
+ ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
+ $ mknode "dc:title" [] (stringify $ docTitle meta)
+ : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta))
+ : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
+ , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+ ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+ let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
+
+ let relsPath = "_rels/.rels"
+ let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
+ $ map (\attrs -> mknode "Relationship" attrs ())
+ [ [("Id","rId1")
+ ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
+ ,("Target","word/document.xml")]
+ , [("Id","rId4")
+ ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties")
+ ,("Target","docProps/app.xml")]
+ , [("Id","rId3")
+ ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties")
+ ,("Target","docProps/core.xml")]
+ ]
+ let relsEntry = toEntry relsPath epochtime $ renderXml rels
+
+ -- we use dist archive for settings.xml, because Word sometimes
+ -- adds references to footnotes or endnotes we don't have...
+ -- we do, however, copy some settings over from reference
+ let settingsPath = "word/settings.xml"
+ settingsList = [ "w:autoHyphenation"
+ , "w:consecutiveHyphenLimit"
+ , "w:hyphenationZone"
+ , "w:doNotHyphenateCap"
+ ]
+ settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
+
+ let entryFromArchive arch path =
+ maybe (fail $ path ++ " missing in reference docx")
+ return
+ (findEntryByPath path arch `mplus` findEntryByPath path distArchive)
+ docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
+ themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
+ fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
+ webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
+ headerFooterEntries <- mapM (entryFromArchive refArchive) $
+ mapMaybe (fmap ("word/" ++) . extractTarget)
+ (headers ++ footers)
+ let miscRelEntries = [ e | e <- zEntries refArchive
+ , "word/_rels/" `isPrefixOf` (eRelativePath e)
+ , ".xml.rels" `isSuffixOf` (eRelativePath e)
+ , eRelativePath e /= "word/_rels/document.xml.rels"
+ , eRelativePath e /= "word/_rels/footnotes.xml.rels" ]
+ let otherMediaEntries = [ e | e <- zEntries refArchive
+ , "word/media/" `isPrefixOf` eRelativePath e ]
+
+ -- Create archive
+ let archive = foldr addEntryToArchive emptyArchive $
+ contentTypesEntry : relsEntry : contentEntry : relEntry :
+ footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
+ docPropsEntry : docPropsAppEntry : themeEntry :
+ fontTableEntry : settingsEntry : webSettingsEntry :
+ imageEntries ++ headerFooterEntries ++
+ miscRelEntries ++ otherMediaEntries
+ return $ fromArchive archive
+
+
+newParaPropToOpenXml :: String -> Element
+newParaPropToOpenXml s =
+ let styleId = filter (not . isSpace) s
+ in mknode "w:style" [ ("w:type", "paragraph")
+ , ("w:customStyle", "1")
+ , ("w:styleId", styleId)]
+ [ mknode "w:name" [("w:val", s)] ()
+ , mknode "w:basedOn" [("w:val","BodyText")] ()
+ , mknode "w:qFormat" [] ()
+ ]
+
+newTextPropToOpenXml :: String -> Element
+newTextPropToOpenXml s =
+ let styleId = filter (not . isSpace) s
+ in mknode "w:style" [ ("w:type", "character")
+ , ("w:customStyle", "1")
+ , ("w:styleId", styleId)]
+ [ mknode "w:name" [("w:val", s)] ()
+ , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
+ ]
+
+styleToOpenXml :: StyleMaps -> Style -> [Element]
+styleToOpenXml sm style =
+ maybeToList parStyle ++ mapMaybe toStyle alltoktypes
+ where alltoktypes = enumFromTo KeywordTok NormalTok
+ toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","character"),
+ ("w:customStyle","1"),("w:styleId",show toktype)]
+ [ mknode "w:name" [("w:val",show toktype)] ()
+ , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
+ , mknode "w:rPr" [] $
+ [ mknode "w:color" [("w:val",tokCol toktype)] ()
+ | tokCol toktype /= "auto" ] ++
+ [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+ | tokBg toktype /= "auto" ] ++
+ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
+ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
+ [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
+ ]
+ tokStyles = tokenStyles style
+ tokFeature f toktype = maybe False f $ lookup toktype tokStyles
+ tokCol toktype = maybe "auto" (drop 1 . fromColor)
+ $ (tokenColor =<< lookup toktype tokStyles)
+ `mplus` defaultColor style
+ tokBg toktype = maybe "auto" (drop 1 . fromColor)
+ $ (tokenBackground =<< lookup toktype tokStyles)
+ `mplus` backgroundColor style
+ parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","paragraph"),
+ ("w:customStyle","1"),("w:styleId","SourceCode")]
+ [ mknode "w:name" [("w:val","Source Code")] ()
+ , mknode "w:basedOn" [("w:val","Normal")] ()
+ , mknode "w:link" [("w:val","VerbatimChar")] ()
+ , mknode "w:pPr" []
+ $ mknode "w:wordWrap" [("w:val","off")] ()
+ : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
+ $ backgroundColor style )
+ ]
+
+copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
+copyChildren refArchive distArchive path timestamp elNames = do
+ ref <- parseXml refArchive distArchive path
+ dist <- parseXml distArchive distArchive path
+ return $ toEntry path timestamp $ renderXml dist{
+ elContent = elContent dist ++ copyContent ref
+ }
+ where
+ strName QName{qName=name, qPrefix=prefix}
+ | Just p <- prefix = p++":"++name
+ | otherwise = name
+ shouldCopy = (`elem` elNames) . strName
+ cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
+ copyContent = map cleanElem . filterChildrenName shouldCopy
+
+-- this is the lowest number used for a list numId
+baseListId :: Int
+baseListId = 1000
+
+mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element]
+mkNumbering lists = do
+ elts <- mapM mkAbstractNum (ordNub lists)
+ return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
+
+mkNum :: ListMarker -> Int -> Element
+mkNum marker numid =
+ mknode "w:num" [("w:numId",show numid)]
+ $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
+ : case marker of
+ NoMarker -> []
+ BulletMarker -> []
+ NumberMarker _ _ start ->
+ map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
+ $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
+
+mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element
+mkAbstractNum marker = do
+ gen <- P.newStdGen
+ let (nsid, _) = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen
+ return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
+ $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
+ : mknode "w:multiLevelType" [("w:val","multilevel")] ()
+ : map (mkLvl marker) [0..6]
+
+mkLvl :: ListMarker -> Int -> Element
+mkLvl marker lvl =
+ mknode "w:lvl" [("w:ilvl",show lvl)] $
+ [ mknode "w:start" [("w:val",start)] ()
+ | marker /= NoMarker && marker /= BulletMarker ] ++
+ [ mknode "w:numFmt" [("w:val",fmt)] ()
+ , mknode "w:lvlText" [("w:val",lvltxt)] ()
+ , mknode "w:lvlJc" [("w:val","left")] ()
+ , mknode "w:pPr" []
+ [ mknode "w:tabs" []
+ $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
+ , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
+ ]
+ ]
+ where (fmt, lvltxt, start) =
+ case marker of
+ NoMarker -> ("bullet"," ","1")
+ BulletMarker -> ("bullet",bulletFor lvl,"1")
+ NumberMarker st de n -> (styleFor st lvl
+ ,patternFor de ("%" ++ show (lvl + 1))
+ ,show n)
+ step = 720
+ hang = 480
+ bulletFor 0 = "\x2022" -- filled circle
+ bulletFor 1 = "\x2013" -- en dash
+ bulletFor 2 = "\x2022" -- hyphen bullet
+ bulletFor 3 = "\x2013"
+ bulletFor 4 = "\x2022"
+ bulletFor 5 = "\x2013"
+ bulletFor _ = "\x2022"
+ styleFor UpperAlpha _ = "upperLetter"
+ styleFor LowerAlpha _ = "lowerLetter"
+ styleFor UpperRoman _ = "upperRoman"
+ styleFor LowerRoman _ = "lowerRoman"
+ styleFor Decimal _ = "decimal"
+ styleFor DefaultStyle 1 = "decimal"
+ styleFor DefaultStyle 2 = "lowerLetter"
+ styleFor DefaultStyle 3 = "lowerRoman"
+ styleFor DefaultStyle 4 = "decimal"
+ styleFor DefaultStyle 5 = "lowerLetter"
+ styleFor DefaultStyle 6 = "lowerRoman"
+ styleFor _ _ = "decimal"
+ patternFor OneParen s = s ++ ")"
+ patternFor TwoParens s = "(" ++ s ++ ")"
+ patternFor _ s = s ++ "."
+
+getNumId :: (PandocMonad m) => WS m Int
+getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
+
+
+makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
+makeTOC opts | writerTableOfContents opts = do
+ let depth = "1-"++(show (writerTOCDepth opts))
+ let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+ tocTitle <- gets stTocTitle
+ title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
+ return $
+ [mknode "w:sdt" [] ([
+ mknode "w:sdtPr" [] (
+ mknode "w:docPartObj" [] (
+ [mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
+ mknode "w:docPartUnique" [] ()]
+ ) -- w:docPartObj
+ ), -- w:sdtPr
+ mknode "w:sdtContent" [] (title++[
+ mknode "w:p" [] (
+ mknode "w:r" [] ([
+ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
+ mknode "w:instrText" [("xml:space","preserve")] tocCmd,
+ mknode "w:fldChar" [("w:fldCharType","separate")] (),
+ mknode "w:fldChar" [("w:fldCharType","end")] ()
+ ]) -- w:r
+ ) -- w:p
+ ])
+ ])] -- w:sdt
+makeTOC _ = return []
+
+
+-- | Convert Pandoc document to two lists of
+-- OpenXML elements (the main document and footnotes).
+writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element])
+writeOpenXML opts (Pandoc meta blocks) = do
+ let tit = docTitle meta ++ case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> LineBreak : xs
+ _ -> []
+ let auths = docAuthors meta
+ let dat = docDate meta
+ let abstract' = case lookupMeta "abstract" meta of
+ Just (MetaBlocks bs) -> bs
+ Just (MetaInlines ils) -> [Plain ils]
+ _ -> []
+ let subtitle' = case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> xs
+ Just (MetaBlocks [Para xs]) -> xs
+ Just (MetaInlines xs) -> xs
+ _ -> []
+ title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+ subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
+ map Para auths
+ date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ abstract <- if null abstract'
+ then return []
+ else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
+ let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
+ convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
+ convertSpace xs = xs
+ let blocks' = bottomUp convertSpace blocks
+ doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
+ notes' <- reverse `fmap` gets stFootnotes
+ toc <- makeTOC opts
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
+ return (meta' ++ doc', notes')
+
+-- | Convert a list of Pandoc blocks to OpenXML.
+blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
+blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
+
+pCustomStyle :: String -> Element
+pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+
+pStyleM :: (PandocMonad m) => String -> WS m XML.Element
+pStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sParaStyleMap styleMaps
+ return $ mknode "w:pStyle" [("w:val",sty')] ()
+
+rCustomStyle :: String -> Element
+rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+
+rStyleM :: (PandocMonad m) => String -> WS m XML.Element
+rStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sCharStyleMap styleMaps
+ return $ mknode "w:rStyle" [("w:val",sty')] ()
+
+getUniqueId :: (PandocMonad m) => m String
+-- the + 20 is to ensure that there are no clashes with the rIds
+-- already in word/document.xml.rel
+getUniqueId = (show . (+ 20)) <$> P.newUniqueHash
+
+-- | Key for specifying user-defined docx styles.
+dynamicStyleKey :: String
+dynamicStyleKey = "custom-style"
+
+-- | Convert a Pandoc block element to OpenXML.
+blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
+
+blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML' _ Null = return []
+blockToOpenXML' opts (Div (ident,classes,kvs) bs)
+ | Just sty <- lookup dynamicStyleKey kvs = do
+ modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)}
+ withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs
+ | Just "rtl" <- lookup "dir" kvs = do
+ let kvs' = filter (("dir", "rtl")/=) kvs
+ local (\env -> env { envRTL = True }) $
+ blockToOpenXML opts (Div (ident,classes,kvs') bs)
+ | Just "ltr" <- lookup "dir" kvs = do
+ let kvs' = filter (("dir", "ltr")/=) kvs
+ local (\env -> env { envRTL = False }) $
+ blockToOpenXML opts (Div (ident,classes,kvs') bs)
+blockToOpenXML' opts (Div (_,["references"],_) bs) = do
+ let (hs, bs') = span isHeaderBlock bs
+ header <- blocksToOpenXML opts hs
+ -- We put the Bibliography style on paragraphs after the header
+ rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
+ return (header ++ rest)
+blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs
+blockToOpenXML' opts (Header lev (ident,_,_) lst) = do
+ setFirstPara
+ paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
+ getParaProps False
+ contents <- inlinesToOpenXML opts lst
+ usedIdents <- gets stSectionIds
+ let bookmarkName = if null ident
+ then uniqueIdent lst usedIdents
+ else ident
+ modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s }
+ id' <- (lift . lift) getUniqueId
+ let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
+ ,("w:name",bookmarkName)] ()
+ let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
+ return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
+blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact")
+ $ blockToOpenXML opts (Para lst)
+-- title beginning with fig: indicates that the image is a figure
+blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do
+ setFirstPara
+ let prop = pCustomStyle $
+ if null alt
+ then "Figure"
+ else "FigureWithCaption"
+ paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False)
+ contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
+ captionNode <- withParaProp (pCustomStyle "ImageCaption")
+ $ blockToOpenXML opts (Para alt)
+ return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
+-- fixDisplayMath sometimes produces a Para [] as artifact
+blockToOpenXML' _ (Para []) = return []
+blockToOpenXML' opts (Para lst) = do
+ isFirstPara <- gets stFirstPara
+ paraProps <- getParaProps $ case lst of
+ [Math DisplayMath _] -> True
+ _ -> False
+ bodyTextStyle <- pStyleM "Body Text"
+ let paraProps' = case paraProps of
+ [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
+ ps -> ps
+ modify $ \s -> s { stFirstPara = False }
+ contents <- inlinesToOpenXML opts lst
+ return [mknode "w:p" [] (paraProps' ++ contents)]
+blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
+blockToOpenXML' _ b@(RawBlock format str)
+ | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
+ | otherwise = do
+ report $ BlockNotRendered b
+ return []
+blockToOpenXML' opts (BlockQuote blocks) = do
+ p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
+ setFirstPara
+ return p
+blockToOpenXML' opts (CodeBlock attrs str) = do
+ p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
+ setFirstPara
+ return p
+blockToOpenXML' _ HorizontalRule = do
+ setFirstPara
+ return [
+ mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
+ $ mknode "v:rect" [("style","width:0;height:1.5pt"),
+ ("o:hralign","center"),
+ ("o:hrstd","t"),("o:hr","t")] () ]
+blockToOpenXML' opts (Table caption aligns widths headers rows) = do
+ setFirstPara
+ let captionStr = stringify caption
+ caption' <- if null caption
+ then return []
+ else withParaProp (pCustomStyle "TableCaption")
+ $ blockToOpenXML opts (Para caption)
+ let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
+ let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
+ $ blocksToOpenXML opts cell
+ headers' <- mapM cellToOpenXML $ zip aligns headers
+ rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
+ let borderProps = mknode "w:tcPr" []
+ [ mknode "w:tcBorders" []
+ $ mknode "w:bottom" [("w:val","single")] ()
+ , mknode "w:vAlign" [("w:val","bottom")] () ]
+ let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [pCustomStyle "Compact"]]]
+ let mkcell border contents = mknode "w:tc" []
+ $ [ borderProps | border ] ++
+ if null contents
+ then emptyCell
+ else contents
+ let mkrow border cells = mknode "w:tr" [] $
+ [mknode "w:trPr" [] [
+ mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
+ ++ map (mkcell border) cells
+ let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
+ let fullrow = 5000 -- 100% specified in pct
+ let rowwidth = fullrow * sum widths
+ let mkgridcol w = mknode "w:gridCol"
+ [("w:w", show (floor (textwidth * w) :: Integer))] ()
+ let hasHeader = not (all null headers)
+ return $
+ caption' ++
+ [mknode "w:tbl" []
+ ( mknode "w:tblPr" []
+ ( mknode "w:tblStyle" [("w:val","TableNormal")] () :
+ mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+ mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
+ [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+ | not (null caption) ] )
+ : mknode "w:tblGrid" []
+ (if all (==0) widths
+ then []
+ else map mkgridcol widths)
+ : [ mkrow True headers' | hasHeader ] ++
+ map (mkrow False) rows'
+ )]
+blockToOpenXML' opts (BulletList lst) = do
+ let marker = BulletMarker
+ addList marker
+ numid <- getNumId
+ l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ setFirstPara
+ return l
+blockToOpenXML' opts (OrderedList (start, numstyle, numdelim) lst) = do
+ let marker = NumberMarker numstyle numdelim start
+ addList marker
+ numid <- getNumId
+ l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ setFirstPara
+ return l
+blockToOpenXML' opts (DefinitionList items) = do
+ l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items
+ setFirstPara
+ return l
+
+definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
+definitionListItemToOpenXML opts (term,defs) = do
+ term' <- withParaProp (pCustomStyle "DefinitionTerm")
+ $ blockToOpenXML opts (Para term)
+ defs' <- withParaProp (pCustomStyle "Definition")
+ $ concat `fmap` mapM (blocksToOpenXML opts) defs
+ return $ term' ++ defs'
+
+addList :: (PandocMonad m) => ListMarker -> WS m ()
+addList marker = do
+ lists <- gets stLists
+ modify $ \st -> st{ stLists = lists ++ [marker] }
+
+listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
+listItemToOpenXML _ _ [] = return []
+listItemToOpenXML opts numid (first:rest) = do
+ first' <- withNumId numid $ blockToOpenXML opts first
+ -- baseListId is the code for no list marker:
+ rest' <- withNumId baseListId $ blocksToOpenXML opts rest
+ return $ first' ++ rest'
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+-- | Convert a list of inline elements to OpenXML.
+inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
+inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
+
+withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
+withNumId numid = local $ \env -> env{ envListNumId = numid }
+
+asList :: (PandocMonad m) => WS m a -> WS m a
+asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
+
+getTextProps :: (PandocMonad m) => WS m [Element]
+getTextProps = do
+ props <- asks envTextProperties
+ return $ if null props
+ then []
+ else [mknode "w:rPr" [] props]
+
+withTextProp :: PandocMonad m => Element -> WS m a -> WS m a
+withTextProp d p =
+ local (\env -> env {envTextProperties = d : envTextProperties env}) p
+
+withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
+withTextPropM = (. flip withTextProp) . (>>=)
+
+getParaProps :: PandocMonad m => Bool -> WS m [Element]
+getParaProps displayMathPara = do
+ props <- asks envParaProperties
+ listLevel <- asks envListLevel
+ numid <- asks envListNumId
+ let listPr = if listLevel >= 0 && not displayMathPara
+ then [ mknode "w:numPr" []
+ [ mknode "w:numId" [("w:val",show numid)] ()
+ , mknode "w:ilvl" [("w:val",show listLevel)] () ]
+ ]
+ else []
+ return $ case props ++ listPr of
+ [] -> []
+ ps -> [mknode "w:pPr" [] ps]
+
+withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
+withParaProp d p =
+ local (\env -> env {envParaProperties = d : envParaProperties env}) p
+
+withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
+withParaPropM = (. flip withParaProp) . (>>=)
+
+formattedString :: PandocMonad m => String -> WS m [Element]
+formattedString str = do
+ props <- getTextProps
+ inDel <- asks envInDel
+ return [ mknode "w:r" [] $
+ props ++
+ [ mknode (if inDel then "w:delText" else "w:t")
+ [("xml:space","preserve")] (stripInvalidChars str) ] ]
+
+setFirstPara :: PandocMonad m => WS m ()
+setFirstPara = modify $ \s -> s { stFirstPara = True }
+
+-- | Convert an inline element to OpenXML.
+inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
+
+inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML' _ (Str str) = formattedString str
+inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
+inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
+inlineToOpenXML' opts (Span (ident,classes,kvs) ils)
+ | Just sty <- lookup dynamicStyleKey kvs = do
+ let kvs' = filter ((dynamicStyleKey, sty)/=) kvs
+ modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)}
+ withTextProp (rCustomStyle sty) $
+ inlineToOpenXML opts (Span (ident,classes,kvs') ils)
+ | Just "rtl" <- lookup "dir" kvs = do
+ let kvs' = filter (("dir", "rtl")/=) kvs
+ local (\env -> env { envRTL = True }) $
+ inlineToOpenXML opts (Span (ident,classes,kvs') ils)
+ | Just "ltr" <- lookup "dir" kvs = do
+ let kvs' = filter (("dir", "ltr")/=) kvs
+ local (\env -> env { envRTL = False }) $
+ inlineToOpenXML opts (Span (ident,classes,kvs') ils)
+ | "insertion" `elem` classes = do
+ defaultAuthor <- asks envChangesAuthor
+ defaultDate <- asks envChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ insId <- gets stInsId
+ modify $ \s -> s{stInsId = (insId + 1)}
+ x <- inlinesToOpenXML opts ils
+ return [ mknode "w:ins" [("w:id", (show insId)),
+ ("w:author", author),
+ ("w:date", date)]
+ x ]
+ | "deletion" `elem` classes = do
+ defaultAuthor <- asks envChangesAuthor
+ defaultDate <- asks envChangesDate
+ let author = fromMaybe defaultAuthor (lookup "author" kvs)
+ date = fromMaybe defaultDate (lookup "date" kvs)
+ delId <- gets stDelId
+ modify $ \s -> s{stDelId = (delId + 1)}
+ x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils)
+ return [ mknode "w:del" [("w:id", (show delId)),
+ ("w:author", author),
+ ("w:date", date)]
+ x ]
+ | otherwise = do
+ let off x = withTextProp (mknode x [("w:val","0")] ())
+ ((if "csl-no-emph" `elem` classes then off "w:i" else id) .
+ (if "csl-no-strong" `elem` classes then off "w:b" else id) .
+ (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id))
+ $ inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Strong lst) =
+ withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML' opts (Emph lst) =
+ withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML' opts (Subscript lst) =
+ withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML' opts (Superscript lst) =
+ withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML' opts (SmallCaps lst) =
+ withTextProp (mknode "w:smallCaps" [] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML' opts (Strikeout lst) =
+ withTextProp (mknode "w:strike" [] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML' _ LineBreak = return [br]
+inlineToOpenXML' _ il@(RawInline f str)
+ | f == Format "openxml" = return [ x | Elem x <- parseXML str ]
+ | otherwise = do
+ report $ InlineNotRendered il
+ return []
+inlineToOpenXML' opts (Quoted quoteType lst) =
+ inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
+ where (open, close) = case quoteType of
+ SingleQuote -> ("\x2018", "\x2019")
+ DoubleQuote -> ("\x201C", "\x201D")
+inlineToOpenXML' opts (Math mathType str) = do
+ when (mathType == DisplayMath) setFirstPara
+ res <- (lift . lift) (convertMath writeOMML mathType str)
+ case res of
+ Right r -> return [r]
+ Left il -> inlineToOpenXML' opts il
+inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
+inlineToOpenXML' opts (Code attrs str) = do
+ let unhighlighted = intercalate [br] `fmap`
+ (mapM formattedString $ lines str)
+ formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
+ toHlTok (toktype,tok) = mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ rCustomStyle (show toktype) ]
+ , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+ withTextProp (rCustomStyle "VerbatimChar")
+ $ case writerHighlightStyle opts >> highlight formatOpenXML attrs str of
+ Just h -> return h
+ Nothing -> unhighlighted
+inlineToOpenXML' opts (Note bs) = do
+ notes <- gets stFootnotes
+ notenum <- (lift . lift) getUniqueId
+ footnoteStyle <- rStyleM "Footnote Reference"
+ let notemarker = mknode "w:r" []
+ [ mknode "w:rPr" [] footnoteStyle
+ , mknode "w:footnoteRef" [] () ]
+ let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
+ let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
+ insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs
+ insertNoteRef xs = Para [notemarkerXml] : xs
+
+ contents <- local (\env -> env{ envListLevel = -1
+ , envParaProperties = []
+ , envTextProperties = [] })
+ (withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
+ $ insertNoteRef bs)
+ let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
+ modify $ \s -> s{ stFootnotes = newnote : notes }
+ return [ mknode "w:r" []
+ [ mknode "w:rPr" [] footnoteStyle
+ , mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
+-- internal link:
+inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
+ return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
+-- external link:
+inlineToOpenXML' opts (Link _ txt (src,_)) = do
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
+ extlinks <- gets stExternalLinks
+ id' <- case M.lookup src extlinks of
+ Just i -> return i
+ Nothing -> do
+ i <- ("rId"++) `fmap` ((lift . lift) getUniqueId)
+ modify $ \st -> st{ stExternalLinks =
+ M.insert src i extlinks }
+ return i
+ return [ mknode "w:hyperlink" [("r:id",id')] contents ]
+inlineToOpenXML' opts (Image attr alt (src, title)) = do
+ -- first, check to see if we've already done this image
+ pageWidth <- asks envPrintWidth
+ imgs <- gets stImages
+ case M.lookup src imgs of
+ Just (_,_,_,elt,_) -> return [elt]
+ Nothing -> do
+ res <- runExceptT $ lift (P.fetchItem (writerSourceURL opts) src)
+ case res of
+ Left (_ :: PandocError) -> do
+ report $ CouldNotFetchResource src ""
+ -- emit alt text
+ inlinesToOpenXML opts alt
+ Right (img, mt) -> do
+ ident <- ("rId"++) `fmap` ((lift . lift) getUniqueId)
+ let (xpt,ypt) = desiredSizeInPoints opts attr
+ (either (const def) id (imageSize img))
+ -- 12700 emu = 1 pt
+ let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700)
+ let cNvPicPr = mknode "pic:cNvPicPr" [] $
+ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
+ let nvPicPr = mknode "pic:nvPicPr" []
+ [ mknode "pic:cNvPr"
+ [("descr",src),("id","0"),("name","Picture")] ()
+ , cNvPicPr ]
+ let blipFill = mknode "pic:blipFill" []
+ [ mknode "a:blip" [("r:embed",ident)] ()
+ , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
+ let xfrm = mknode "a:xfrm" []
+ [ mknode "a:off" [("x","0"),("y","0")] ()
+ , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
+ let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
+ mknode "a:avLst" [] ()
+ let ln = mknode "a:ln" [("w","9525")]
+ [ mknode "a:noFill" [] ()
+ , mknode "a:headEnd" [] ()
+ , mknode "a:tailEnd" [] () ]
+ let spPr = mknode "pic:spPr" [("bwMode","auto")]
+ [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
+ let graphic = mknode "a:graphic" [] $
+ mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
+ [ mknode "pic:pic" []
+ [ nvPicPr
+ , blipFill
+ , spPr ] ]
+ let imgElt = mknode "w:r" [] $
+ mknode "w:drawing" [] $
+ mknode "wp:inline" []
+ [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+ , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
+ , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] ()
+ , graphic ]
+ let imgext = case mt >>= extensionFromMimeType of
+ Just x -> '.':x
+ Nothing -> case imageType img of
+ Just Png -> ".png"
+ Just Jpeg -> ".jpeg"
+ Just Gif -> ".gif"
+ Just Pdf -> ".pdf"
+ Just Eps -> ".eps"
+ Nothing -> ""
+ if null imgext
+ then -- without an extension there is no rule for content type
+ inlinesToOpenXML opts alt -- return alt to avoid corrupted docx
+ else do
+ let imgpath = "media/" ++ ident ++ imgext
+ let mbMimeType = mt <|> getMimeType imgpath
+ -- insert mime type to use in constructing [Content_Types].xml
+ modify $ \st -> st{ stImages =
+ M.insert src (ident, imgpath, mbMimeType, imgElt, img)
+ $ stImages st }
+ return [imgElt]
+
+br :: Element
+br = breakElement "textWrapping"
+
+breakElement :: String -> Element
+breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ]
+
+-- Word will insert these footnotes into the settings.xml file
+-- (whether or not they're visible in the document). If they're in the
+-- file, but not in the footnotes.xml file, it will produce
+-- problems. So we want to make sure we insert them into our document.
+defaultFootnotes :: [Element]
+defaultFootnotes = [ mknode "w:footnote"
+ [("w:type", "separator"), ("w:id", "-1")] $
+ [ mknode "w:p" [] $
+ [mknode "w:r" [] $
+ [ mknode "w:separator" [] ()]]]
+ , mknode "w:footnote"
+ [("w:type", "continuationSeparator"), ("w:id", "0")] $
+ [ mknode "w:p" [] $
+ [ mknode "w:r" [] $
+ [ mknode "w:continuationSeparator" [] ()]]]]
+
+parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element
+parseXml refArchive distArchive relpath =
+ case findEntryByPath relpath refArchive `mplus`
+ findEntryByPath relpath distArchive of
+ Nothing -> fail $ relpath ++ " missing in reference docx"
+ Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
+ Nothing -> fail $ relpath ++ " corrupt in reference docx"
+ Just d -> return d
+
+-- | Scales the image to fit the page
+-- sizes are passed in emu
+fitToPage :: (Double, Double) -> Integer -> (Integer, Integer)
+fitToPage (x, y) pageWidth
+ -- Fixes width to the page width and scales the height
+ | x > fromIntegral pageWidth =
+ (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y)
+ | otherwise = (floor x, floor y)
+
+withDirection :: PandocMonad m => WS m a -> WS m a
+withDirection x = do
+ isRTL <- asks envRTL
+ paraProps <- asks envParaProperties
+ textProps <- asks envTextProperties
+ -- We want to clean all bidirection (bidi) and right-to-left (rtl)
+ -- properties from the props first. This is because we don't want
+ -- them to stack up.
+ let paraProps' = filter (\e -> (qName . elName) e /= "bidi") paraProps
+ textProps' = filter (\e -> (qName . elName) e /= "rtl") textProps
+ if isRTL
+ -- if we are going right-to-left, we (re?)add the properties.
+ then flip local x $
+ \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps'
+ , envTextProperties = (mknode "w:rtl" [] ()) : textProps'
+ }
+ else flip local x $ \env -> env { envParaProperties = paraProps'
+ , envTextProperties = textProps'
+ }
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
new file mode 100644
index 000000000..79a371d4d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -0,0 +1,522 @@
+{-
+Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.DokuWiki
+ Copyright : Copyright (C) 2008-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Clare Macrae <clare.macrae@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to DokuWiki markup.
+
+DokuWiki: <https://www.dokuwiki.org/dokuwiki>
+-}
+
+{-
+ [ ] Implement nested blockquotes (currently only ever does one level)
+ [x] Implement alignment of text in tables
+ [ ] Implement comments
+ [ ] Work through the Dokuwiki spec, and check I've not missed anything out
+ [ ] Remove dud/duplicate code
+-}
+
+module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options ( WriterOptions(
+ writerTableOfContents
+ , writerTemplate
+ , writerWrapText), WrapOption(..) )
+import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting
+ , camelCaseToHyphenated, trimr, substitute )
+import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates ( renderTemplate' )
+import Data.List ( intersect, intercalate, isPrefixOf, transpose )
+import Data.Default (Default(..))
+import Network.URI ( isURI )
+import Control.Monad ( zipWithM )
+import Control.Monad.State ( modify, State, get, evalState )
+import Control.Monad.Reader ( ReaderT, runReaderT, ask, local )
+import Text.Pandoc.Class (PandocMonad)
+
+data WriterState = WriterState {
+ stNotes :: Bool -- True if there are notes
+ }
+
+data WriterEnvironment = WriterEnvironment {
+ stIndent :: String -- Indent after the marker at the beginning of list items
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell)
+ }
+
+instance Default WriterState where
+ def = WriterState { stNotes = False }
+
+instance Default WriterEnvironment where
+ def = WriterEnvironment { stIndent = ""
+ , stUseTags = False
+ , stBackSlashLB = False }
+
+type DokuWiki = ReaderT WriterEnvironment (State WriterState)
+
+-- | Convert Pandoc to DokuWiki.
+writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeDokuWiki opts document = return $
+ runDokuWiki (pandocToDokuWiki opts document)
+
+runDokuWiki :: DokuWiki a -> a
+runDokuWiki = flip evalState def . flip runReaderT def
+
+-- | Return DokuWiki representation of document.
+pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String
+pandocToDokuWiki opts (Pandoc meta blocks) = do
+ metadata <- metaToJSON opts
+ (fmap trimr . blockListToDokuWiki opts)
+ (inlineListToDokuWiki opts)
+ meta
+ body <- blockListToDokuWiki opts blocks
+ notesExist <- stNotes <$> get
+ let notes = if notesExist
+ then "" -- TODO Was "\n<references />" Check whether I can really remove this:
+ -- if it is definitely to do with footnotes, can remove this whole bit
+ else ""
+ let main = body ++ notes
+ let context = defField "body" main
+ $ defField "toc" (writerTableOfContents opts)
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Escape special characters for DokuWiki.
+escapeString :: String -> String
+escapeString = substitute "__" "%%__%%" .
+ substitute "**" "%%**%%" .
+ substitute "//" "%%//%%"
+
+-- | Convert Pandoc block element to DokuWiki.
+blockToDokuWiki :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> DokuWiki String
+
+blockToDokuWiki _ Null = return ""
+
+blockToDokuWiki opts (Div _attrs bs) = do
+ contents <- blockListToDokuWiki opts bs
+ return $ contents ++ "\n"
+
+blockToDokuWiki opts (Plain inlines) =
+ inlineListToDokuWiki opts inlines
+
+-- title beginning with fig: indicates that the image is a figure
+-- dokuwiki doesn't support captions - so combine together alt and caption into alt
+blockToDokuWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return ""
+ else (" " ++) `fmap` inlineListToDokuWiki opts txt
+ let opt = if null txt
+ then ""
+ else "|" ++ if null tit then capt else tit ++ capt
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI src then "" else ":"
+ return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n"
+
+blockToDokuWiki opts (Para inlines) = do
+ indent <- stIndent <$> ask
+ useTags <- stUseTags <$> ask
+ contents <- inlineListToDokuWiki opts inlines
+ return $ if useTags
+ then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>"
+ else contents ++ if null indent then "\n" else ""
+
+blockToDokuWiki opts (LineBlock lns) =
+ blockToDokuWiki opts $ linesToPara lns
+
+blockToDokuWiki _ (RawBlock f str)
+ | f == Format "dokuwiki" = return str
+ -- See https://www.dokuwiki.org/wiki:syntax
+ -- use uppercase HTML tag for block-level content:
+ | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>"
+ | otherwise = return ""
+
+blockToDokuWiki _ HorizontalRule = return "\n----\n"
+
+blockToDokuWiki opts (Header level _ inlines) = do
+ -- emphasis, links etc. not allowed in headers, apparently,
+ -- so we remove formatting:
+ contents <- inlineListToDokuWiki opts $ removeFormatting inlines
+ let eqs = replicate ( 7 - level ) '='
+ return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
+
+blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
+ let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
+ "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
+ "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
+ "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
+ "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
+ "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
+ "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
+ "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
+ "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
+ return $ "<code" ++
+ (case at of
+ [] -> ">\n"
+ (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>"
+
+blockToDokuWiki opts (BlockQuote blocks) = do
+ contents <- blockListToDokuWiki opts blocks
+ if isSimpleBlockQuote blocks
+ then return $ unlines $ map ("> " ++) $ lines contents
+ else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>"
+
+blockToDokuWiki opts (Table capt aligns _ headers rows) = do
+ captionDoc <- if null capt
+ then return ""
+ else do
+ c <- inlineListToDokuWiki opts capt
+ return $ "" ++ c ++ "\n"
+ headers' <- if all null headers
+ then return []
+ else zipWithM (tableItemToDokuWiki opts) aligns headers
+ rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
+ let widths = map (maximum . map length) $ transpose (headers':rows')
+ let padTo (width, al) s =
+ case (width - length s) of
+ x | x > 0 ->
+ if al == AlignLeft || al == AlignDefault
+ then s ++ replicate x ' '
+ else if al == AlignRight
+ then replicate x ' ' ++ s
+ else replicate (x `div` 2) ' ' ++
+ s ++ replicate (x - x `div` 2) ' '
+ | otherwise -> s
+ let renderRow sep cells = sep ++
+ intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep
+ return $ captionDoc ++
+ (if null headers' then "" else renderRow "^" headers' ++ "\n") ++
+ unlines (map (renderRow "|") rows')
+
+blockToDokuWiki opts x@(BulletList items) = do
+ oldUseTags <- stUseTags <$> ask
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- local (\s -> s { stUseTags = True })
+ (mapM (listItemToDokuWiki opts) items)
+ return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n"
+ else do
+ contents <- local (\s -> s { stIndent = stIndent s ++ " "
+ , stBackSlashLB = backSlash})
+ (mapM (listItemToDokuWiki opts) items)
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+blockToDokuWiki opts x@(OrderedList attribs items) = do
+ oldUseTags <- stUseTags <$> ask
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- local (\s -> s { stUseTags = True })
+ (mapM (orderedListItemToDokuWiki opts) items)
+ return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n"
+ else do
+ contents <- local (\s -> s { stIndent = stIndent s ++ " "
+ , stBackSlashLB = backSlash})
+ (mapM (orderedListItemToDokuWiki opts) items)
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+-- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there
+-- is a specific representation of them.
+-- TODO This creates double '; ; ' if there is a bullet or ordered list inside a definition list
+blockToDokuWiki opts x@(DefinitionList items) = do
+ oldUseTags <- stUseTags <$> ask
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- local (\s -> s { stUseTags = True })
+ (mapM (definitionListItemToDokuWiki opts) items)
+ return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n"
+ else do
+ contents <- local (\s -> s { stIndent = stIndent s ++ " "
+ , stBackSlashLB = backSlash})
+ (mapM (definitionListItemToDokuWiki opts) items)
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet list item (list of blocks) to DokuWiki.
+listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
+listItemToDokuWiki opts items = do
+ contents <- blockListToDokuWiki opts items
+ useTags <- stUseTags <$> ask
+ if useTags
+ then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
+ else do
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let indent' = if backSlash then (drop 2 indent) else indent
+ return $ indent' ++ "* " ++ contents
+
+-- | Convert ordered list item (list of blocks) to DokuWiki.
+-- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki
+orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String
+orderedListItemToDokuWiki opts items = do
+ contents <- blockListToDokuWiki opts items
+ useTags <- stUseTags <$> ask
+ if useTags
+ then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
+ else do
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let indent' = if backSlash then (drop 2 indent) else indent
+ return $ indent' ++ "- " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to DokuWiki.
+definitionListItemToDokuWiki :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> DokuWiki String
+definitionListItemToDokuWiki opts (label, items) = do
+ labelText <- inlineListToDokuWiki opts label
+ contents <- mapM (blockListToDokuWiki opts) items
+ useTags <- stUseTags <$> ask
+ if useTags
+ then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++
+ (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents)
+ else do
+ indent <- stIndent <$> ask
+ backSlash <- stBackSlashLB <$> ask
+ let indent' = if backSlash then (drop 2 indent) else indent
+ return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (num, sty, _) items -> all isSimpleListItem items &&
+ num == 1 && sty `elem` [DefaultStyle, Decimal]
+ DefinitionList items -> all isSimpleListItem $ concatMap snd items
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+isSimpleBlockQuote :: [Block] -> Bool
+isSimpleBlockQuote bs = all isPlainOrPara bs
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+backSlashLineBreaks :: String -> String
+backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs
+ where f '\n' = "\\\\ "
+ f c = [c]
+ g (' ' : '\\':'\\': xs) = xs
+ g s = s
+
+-- Auxiliary functions for tables:
+
+tableItemToDokuWiki :: WriterOptions
+ -> Alignment
+ -> [Block]
+ -> DokuWiki String
+tableItemToDokuWiki opts align' item = do
+ let mkcell x = (if align' == AlignRight || align' == AlignCenter
+ then " "
+ else "") ++ x ++
+ (if align' == AlignLeft || align' == AlignCenter
+ then " "
+ else "")
+ contents <- local (\s -> s { stBackSlashLB = True }) $
+ blockListToDokuWiki opts item
+ return $ mkcell contents
+
+-- | Convert list of Pandoc block elements to DokuWiki.
+blockListToDokuWiki :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> DokuWiki String
+blockListToDokuWiki opts blocks = do
+ backSlash <- stBackSlashLB <$> ask
+ let blocks' = consolidateRawBlocks blocks
+ if backSlash
+ then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks'
+ else vcat <$> mapM (blockToDokuWiki opts) blocks'
+
+consolidateRawBlocks :: [Block] -> [Block]
+consolidateRawBlocks [] = []
+consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs)
+ | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs)
+consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs
+
+-- | Convert list of Pandoc inline elements to DokuWiki.
+inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String
+inlineListToDokuWiki opts lst =
+ concat <$> (mapM (inlineToDokuWiki opts) lst)
+
+-- | Convert Pandoc inline element to DokuWiki.
+inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String
+
+inlineToDokuWiki opts (Span _attrs ils) =
+ inlineListToDokuWiki opts ils
+
+inlineToDokuWiki opts (Emph lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "//" ++ contents ++ "//"
+
+inlineToDokuWiki opts (Strong lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "**" ++ contents ++ "**"
+
+inlineToDokuWiki opts (Strikeout lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<del>" ++ contents ++ "</del>"
+
+inlineToDokuWiki opts (Superscript lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<sup>" ++ contents ++ "</sup>"
+
+inlineToDokuWiki opts (Subscript lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<sub>" ++ contents ++ "</sub>"
+
+inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst
+
+inlineToDokuWiki opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "\8216" ++ contents ++ "\8217"
+
+inlineToDokuWiki opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "\8220" ++ contents ++ "\8221"
+
+inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst
+
+inlineToDokuWiki _ (Code _ str) =
+ -- In dokuwiki, text surrounded by '' is really just a font statement, i.e. <tt>,
+ -- and so other formatting can be present inside.
+ -- However, in pandoc, and markdown, inlined code doesn't contain formatting.
+ -- So I have opted for using %% to disable all formatting inside inline code blocks.
+ -- This gives the best results when converting from other formats to dokuwiki, even if
+ -- the resultand code is a little ugly, for short strings that don't contain formatting
+ -- characters.
+ -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format,
+ -- any formatting inside inlined code blocks would be lost, or presented incorrectly.
+ return $ "''%%" ++ str ++ "%%''"
+
+inlineToDokuWiki _ (Str str) = return $ escapeString str
+
+inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim
+ -- note: str should NOT be escaped
+ where delim = case mathType of
+ DisplayMath -> "$$"
+ InlineMath -> "$"
+
+inlineToDokuWiki _ (RawInline f str)
+ | f == Format "dokuwiki" = return str
+ | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
+ | otherwise = return ""
+
+inlineToDokuWiki _ LineBreak = return "\\\\\n"
+
+inlineToDokuWiki opts SoftBreak =
+ case writerWrapText opts of
+ WrapNone -> return " "
+ WrapAuto -> return " "
+ WrapPreserve -> return "\n"
+
+inlineToDokuWiki _ Space = return " "
+
+inlineToDokuWiki opts (Link _ txt (src, _)) = do
+ label <- inlineListToDokuWiki opts txt
+ case txt of
+ [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
+ | escapeURI s == src -> return src
+ _ -> if isURI src
+ then return $ "[[" ++ src ++ "|" ++ label ++ "]]"
+ else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
+ where src' = case src of
+ '/':xs -> xs -- with leading / it's a
+ _ -> src -- link to a help page
+inlineToDokuWiki opts (Image attr alt (source, tit)) = do
+ alt' <- inlineListToDokuWiki opts alt
+ let txt = case (tit, alt) of
+ ("", []) -> ""
+ ("", _ ) -> "|" ++ alt'
+ (_ , _ ) -> "|" ++ tit
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI source then "" else ":"
+ return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}"
+
+inlineToDokuWiki opts (Note contents) = do
+ contents' <- blockListToDokuWiki opts contents
+ modify (\s -> s { stNotes = True })
+ return $ "((" ++ contents' ++ "))"
+ -- note - may not work for notes with multiple blocks
+
+imageDims :: WriterOptions -> Attr -> String
+imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
+ where
+ toPx = fmap (showInPixel opts) . checkPct
+ checkPct (Just (Percent _)) = Nothing
+ checkPct maybeDim = maybeDim
+ go (Just w) Nothing = "?" ++ w
+ go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
+ go Nothing (Just h) = "?0x" ++ h
+ go Nothing Nothing = ""
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
new file mode 100644
index 000000000..247014c20
--- /dev/null
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -0,0 +1,1257 @@
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-}
+{-
+Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.EPUB
+ Copyright : Copyright (C) 2010-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to EPUB.
+-}
+module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
+import Text.Pandoc.Logging
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Maybe ( fromMaybe, catMaybes )
+import Data.List ( isPrefixOf, isInfixOf, intercalate )
+import Text.Printf (printf)
+import System.FilePath ( takeExtension, takeFileName )
+import Network.HTTP ( urlEncode )
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy.Char8 as B8
+import qualified Text.Pandoc.UTF8 as UTF8
+import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
+import Text.Pandoc.Compat.Time
+import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim
+ , normalizeDate, stringify
+ , hierarchicalize )
+import qualified Text.Pandoc.Shared as S (Element(..))
+import Text.Pandoc.Builder (fromList, setMeta)
+import Text.Pandoc.Options ( WriterOptions(..)
+ , WrapOption(..)
+ , HTMLMathMethod(..)
+ , EPUBVersion(..)
+ , ObfuscationMethod(NoObfuscation) )
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk (walk, walkM, query)
+import Text.Pandoc.UUID (getUUID)
+import Control.Monad.State (modify, get, gets, State, StateT, put, evalState, evalStateT, lift)
+import Control.Monad (mplus, when, zipWithM)
+import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
+ , strContent, lookupAttr, Node(..), QName(..), parseXML
+ , onlyElems, node, ppElement)
+import Text.Pandoc.Writers.HTML ( writeHtmlStringForEPUB )
+import Data.Char ( toLower, isDigit, isAlphaNum )
+import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
+import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
+import Control.Monad.Except (throwError, catchError)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
+
+-- A Chapter includes a list of blocks and maybe a section
+-- number offset. Note, some chapters are unnumbered. The section
+-- number is different from the index number, which will be used
+-- in filenames, chapter0003.xhtml.
+data Chapter = Chapter (Maybe [Int]) [Block]
+
+data EPUBState = EPUBState {
+ stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))]
+ }
+
+type E m = StateT EPUBState m
+
+data EPUBMetadata = EPUBMetadata{
+ epubIdentifier :: [Identifier]
+ , epubTitle :: [Title]
+ , epubDate :: [Date]
+ , epubLanguage :: String
+ , epubCreator :: [Creator]
+ , epubContributor :: [Creator]
+ , epubSubject :: [String]
+ , epubDescription :: Maybe String
+ , epubType :: Maybe String
+ , epubFormat :: Maybe String
+ , epubPublisher :: Maybe String
+ , epubSource :: Maybe String
+ , epubRelation :: Maybe String
+ , epubCoverage :: Maybe String
+ , epubRights :: Maybe String
+ , epubCoverImage :: Maybe String
+ , epubStylesheet :: Maybe Stylesheet
+ , epubPageDirection :: Maybe ProgressionDirection
+ } deriving Show
+
+data Stylesheet = StylesheetPath FilePath
+ | StylesheetContents String
+ deriving Show
+
+data Date = Date{
+ dateText :: String
+ , dateEvent :: Maybe String
+ } deriving Show
+
+data Creator = Creator{
+ creatorText :: String
+ , creatorRole :: Maybe String
+ , creatorFileAs :: Maybe String
+ } deriving Show
+
+data Identifier = Identifier{
+ identifierText :: String
+ , identifierScheme :: Maybe String
+ } deriving Show
+
+data Title = Title{
+ titleText :: String
+ , titleFileAs :: Maybe String
+ , titleType :: Maybe String
+ } deriving Show
+
+data ProgressionDirection = LTR | RTL deriving Show
+
+dcName :: String -> QName
+dcName n = QName n Nothing (Just "dc")
+
+dcNode :: Node t => String -> t -> Element
+dcNode = node . dcName
+
+opfName :: String -> QName
+opfName n = QName n Nothing (Just "opf")
+
+toId :: FilePath -> String
+toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+ then x
+ else '_') . takeFileName
+
+removeNote :: Inline -> Inline
+removeNote (Note _) = Str ""
+removeNote x = x
+
+getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
+getEPUBMetadata opts meta = do
+ let md = metadataFromMeta opts meta
+ let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+ let md' = foldr addMetadataFromXML md elts
+ let addIdentifier m =
+ if null (epubIdentifier m)
+ then do
+ randomId <- (show . getUUID) <$> lift P.newStdGen
+ return $ m{ epubIdentifier = [Identifier randomId Nothing] }
+ else return m
+ let addLanguage m =
+ if null (epubLanguage m)
+ then case lookup "lang" (writerVariables opts) of
+ Just x -> return m{ epubLanguage = x }
+ Nothing -> do
+ mLang <- lift $ P.lookupEnv "LANG"
+ let localeLang =
+ case mLang of
+ Just lang ->
+ map (\c -> if c == '_' then '-' else c) $
+ takeWhile (/='.') lang
+ Nothing -> "en-US"
+ return m{ epubLanguage = localeLang }
+ else return m
+ let fixDate m =
+ if null (epubDate m)
+ then do
+ currentTime <- lift P.getCurrentTime
+ return $ m{ epubDate = [ Date{
+ dateText = showDateTimeISO8601 currentTime
+ , dateEvent = Nothing } ] }
+ else return m
+ let addAuthor m =
+ if any (\c -> creatorRole c == Just "aut") $ epubCreator m
+ then return m
+ else do
+ let authors' = map stringify $ docAuthors meta
+ let toAuthor name = Creator{ creatorText = name
+ , creatorRole = Just "aut"
+ , creatorFileAs = Nothing }
+ return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
+ addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage
+
+addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata
+addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
+ | name == "identifier" = md{ epubIdentifier =
+ Identifier{ identifierText = strContent e
+ , identifierScheme = lookupAttr (opfName "scheme") attrs
+ } : epubIdentifier md }
+ | name == "title" = md{ epubTitle =
+ Title{ titleText = strContent e
+ , titleFileAs = getAttr "file-as"
+ , titleType = getAttr "type"
+ } : epubTitle md }
+ | name == "date" = md{ epubDate =
+ Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e
+ , dateEvent = getAttr "event"
+ } : epubDate md }
+ | name == "language" = md{ epubLanguage = strContent e }
+ | name == "creator" = md{ epubCreator =
+ Creator{ creatorText = strContent e
+ , creatorRole = getAttr "role"
+ , creatorFileAs = getAttr "file-as"
+ } : epubCreator md }
+ | name == "contributor" = md{ epubContributor =
+ Creator { creatorText = strContent e
+ , creatorRole = getAttr "role"
+ , creatorFileAs = getAttr "file-as"
+ } : epubContributor md }
+ | name == "subject" = md{ epubSubject = strContent e : epubSubject md }
+ | name == "description" = md { epubDescription = Just $ strContent e }
+ | name == "type" = md { epubType = Just $ strContent e }
+ | name == "format" = md { epubFormat = Just $ strContent e }
+ | name == "type" = md { epubType = Just $ strContent e }
+ | name == "publisher" = md { epubPublisher = Just $ strContent e }
+ | name == "source" = md { epubSource = Just $ strContent e }
+ | name == "relation" = md { epubRelation = Just $ strContent e }
+ | name == "coverage" = md { epubCoverage = Just $ strContent e }
+ | name == "rights" = md { epubRights = Just $ strContent e }
+ | otherwise = md
+ where getAttr n = lookupAttr (opfName n) attrs
+addMetadataFromXML _ md = md
+
+metaValueToString :: MetaValue -> String
+metaValueToString (MetaString s) = s
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs) = stringify bs
+metaValueToString (MetaBool True) = "true"
+metaValueToString (MetaBool False) = "false"
+metaValueToString _ = ""
+
+getList :: String -> Meta -> (MetaValue -> a) -> [a]
+getList s meta handleMetaValue =
+ case lookupMeta s meta of
+ Just (MetaList xs) -> map handleMetaValue xs
+ Just mv -> [handleMetaValue mv]
+ Nothing -> []
+
+getIdentifier :: Meta -> [Identifier]
+getIdentifier meta = getList "identifier" meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Identifier{ identifierText = maybe "" metaValueToString
+ $ M.lookup "text" m
+ , identifierScheme = metaValueToString <$>
+ M.lookup "scheme" m }
+ handleMetaValue mv = Identifier (metaValueToString mv) Nothing
+
+getTitle :: Meta -> [Title]
+getTitle meta = getList "title" meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m
+ , titleFileAs = metaValueToString <$> M.lookup "file-as" m
+ , titleType = metaValueToString <$> M.lookup "type" m }
+ handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
+
+getCreator :: String -> Meta -> [Creator]
+getCreator s meta = getList s meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
+ , creatorFileAs = metaValueToString <$> M.lookup "file-as" m
+ , creatorRole = metaValueToString <$> M.lookup "role" m }
+ handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
+
+getDate :: String -> Meta -> [Date]
+getDate s meta = getList s meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Date{ dateText = maybe "" id $
+ M.lookup "text" m >>= normalizeDate' . metaValueToString
+ , dateEvent = metaValueToString <$> M.lookup "event" m }
+ handleMetaValue mv = Date { dateText = maybe ""
+ id $ normalizeDate' $ metaValueToString mv
+ , dateEvent = Nothing }
+
+simpleList :: String -> Meta -> [String]
+simpleList s meta =
+ case lookupMeta s meta of
+ Just (MetaList xs) -> map metaValueToString xs
+ Just x -> [metaValueToString x]
+ Nothing -> []
+
+metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
+metadataFromMeta opts meta = EPUBMetadata{
+ epubIdentifier = identifiers
+ , epubTitle = titles
+ , epubDate = date
+ , epubLanguage = language
+ , epubCreator = creators
+ , epubContributor = contributors
+ , epubSubject = subjects
+ , epubDescription = description
+ , epubType = epubtype
+ , epubFormat = format
+ , epubPublisher = publisher
+ , epubSource = source
+ , epubRelation = relation
+ , epubCoverage = coverage
+ , epubRights = rights
+ , epubCoverImage = coverImage
+ , epubStylesheet = stylesheet
+ , epubPageDirection = pageDirection
+ }
+ where identifiers = getIdentifier meta
+ titles = getTitle meta
+ date = getDate "date" meta
+ language = maybe "" metaValueToString $
+ lookupMeta "language" meta `mplus` lookupMeta "lang" meta
+ creators = getCreator "creator" meta
+ contributors = getCreator "contributor" meta
+ subjects = simpleList "subject" meta
+ description = metaValueToString <$> lookupMeta "description" meta
+ epubtype = metaValueToString <$> lookupMeta "type" meta
+ format = metaValueToString <$> lookupMeta "format" meta
+ publisher = metaValueToString <$> lookupMeta "publisher" meta
+ source = metaValueToString <$> lookupMeta "source" meta
+ relation = metaValueToString <$> lookupMeta "relation" meta
+ coverage = metaValueToString <$> lookupMeta "coverage" meta
+ rights = metaValueToString <$> lookupMeta "rights" meta
+ coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
+ (metaValueToString <$> lookupMeta "cover-image" meta)
+ stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
+ ((StylesheetPath . metaValueToString) <$>
+ lookupMeta "stylesheet" meta)
+ pageDirection = case map toLower . metaValueToString <$>
+ lookupMeta "page-progression-direction" meta of
+ Just "ltr" -> Just LTR
+ Just "rtl" -> Just RTL
+ _ -> Nothing
+
+-- | Produce an EPUB2 file from a Pandoc document.
+writeEPUB2 :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeEPUB2 = writeEPUB EPUB2
+
+-- | Produce an EPUB3 file from a Pandoc document.
+writeEPUB3 :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeEPUB3 = writeEPUB EPUB3
+
+-- | Produce an EPUB file from a Pandoc document.
+writeEPUB :: PandocMonad m
+ => EPUBVersion
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeEPUB epubVersion opts doc =
+ let initState = EPUBState { stMediaPaths = []
+ }
+ in
+ evalStateT (pandocToEPUB epubVersion opts doc)
+ initState
+
+pandocToEPUB :: PandocMonad m
+ => EPUBVersion
+ -> WriterOptions
+ -> Pandoc
+ -> E m B.ByteString
+pandocToEPUB version opts doc@(Pandoc meta _) = do
+ let epub3 = version == EPUB3
+ let writeHtml o = fmap UTF8.fromStringLazy .
+ writeHtmlStringForEPUB version o
+ epochtime <- floor <$> lift P.getPOSIXTime
+ let mkEntry path content = toEntry path epochtime content
+ let vars = ("epub3", if epub3 then "true" else "false")
+ : ("css", "stylesheet.css")
+ : writerVariables opts
+ let opts' = opts{ writerEmailObfuscation = NoObfuscation
+ , writerSectionDivs = True
+ , writerVariables = vars
+ , writerHTMLMathMethod =
+ if epub3
+ then MathML
+ else writerHTMLMathMethod opts
+ , writerWrapText = WrapAuto }
+ metadata <- getEPUBMetadata opts' meta
+
+ -- cover page
+ (cpgEntry, cpicEntry) <-
+ case epubCoverImage metadata of
+ Nothing -> return ([],[])
+ Just img -> do
+ let coverImage = "media/" ++ takeFileName img
+ cpContent <- lift $ writeHtml
+ opts'{ writerVariables = ("coverpage","true"):vars }
+ (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+ imgContent <- lift $ P.readFileLazy img
+ return ( [mkEntry "cover.xhtml" cpContent]
+ , [mkEntry coverImage imgContent] )
+
+ -- title page
+ tpContent <- lift $ writeHtml opts'{
+ writerVariables = ("titlepage","true"):vars }
+ (Pandoc meta [])
+ let tpEntry = mkEntry "title_page.xhtml" tpContent
+
+ -- handle pictures
+ -- mediaRef <- P.newIORef []
+ Pandoc _ blocks <- walkM (transformInline opts') doc >>=
+ walkM (transformBlock opts')
+ picEntries <- (catMaybes . map (snd . snd)) <$> (gets stMediaPaths)
+ -- handle fonts
+ let matchingGlob f = do
+ xs <- lift $ P.glob f
+ when (null xs) $
+ report $ CouldNotFetchResource f "glob did not match any font files"
+ return xs
+ let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f)
+ fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
+ fontEntries <- mapM mkFontEntry fontFiles
+
+ -- set page progression direction attribution
+ let progressionDirection = case epubPageDirection metadata of
+ Just LTR | epub3 ->
+ [("page-progression-direction", "ltr")]
+ Just RTL | epub3 ->
+ [("page-progression-direction", "rtl")]
+ _ -> []
+
+ -- body pages
+
+ -- add level 1 header to beginning if none there
+ let blocks' = addIdentifiers
+ $ case blocks of
+ (Header 1 _ _ : _) -> blocks
+ _ -> Header 1 ("",["unnumbered"],[])
+ (docTitle' meta) : blocks
+
+ let chapterHeaderLevel = writerEpubChapterLevel opts
+
+ let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
+ isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
+ n <= chapterHeaderLevel
+ isChapterHeader _ = False
+
+ let toChapters :: [Block] -> State [Int] [Chapter]
+ toChapters [] = return []
+ toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) =
+ toChapters (bs ++ rest)
+ toChapters (Header n attr@(_,classes,_) ils : bs) = do
+ nums <- get
+ mbnum <- if "unnumbered" `elem` classes
+ then return Nothing
+ else case splitAt (n - 1) nums of
+ (ks, (m:_)) -> do
+ let nums' = ks ++ [m+1]
+ put nums'
+ return $ Just (ks ++ [m])
+ -- note, this is the offset not the sec number
+ (ks, []) -> do
+ let nums' = ks ++ [1]
+ put nums'
+ return $ Just ks
+ let (xs,ys) = break isChapterHeader bs
+ (Chapter mbnum (Header n attr ils : xs) :) `fmap` toChapters ys
+ toChapters (b:bs) = do
+ let (xs,ys) = break isChapterHeader bs
+ (Chapter Nothing (b:xs) :) `fmap` toChapters ys
+
+ let chapters' = evalState (toChapters blocks') []
+
+ let extractLinkURL' :: Int -> Inline -> [(String, String)]
+ extractLinkURL' num (Span (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL' _ _ = []
+
+ let extractLinkURL :: Int -> Block -> [(String, String)]
+ extractLinkURL num (Div (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL num (Header _ (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL num b = query (extractLinkURL' num) b
+
+ let reftable = concat $ zipWith (\(Chapter _ bs) num ->
+ query (extractLinkURL num) bs)
+ chapters' [1..]
+
+ let fixInternalReferences :: Inline -> Inline
+ fixInternalReferences (Link attr lab ('#':xs, tit)) =
+ case lookup xs reftable of
+ Just ys -> Link attr lab (ys, tit)
+ Nothing -> Link attr lab ('#':xs, tit)
+ fixInternalReferences x = x
+
+ -- internal reference IDs change when we chunk the file,
+ -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
+ -- this fixes that:
+ let chapters = map (\(Chapter mbnum bs) ->
+ Chapter mbnum $ walk fixInternalReferences bs)
+ chapters'
+
+ let chapToEntry num (Chapter mbnum bs) =
+ mkEntry (showChapter num) <$>
+ (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum }
+ $ case bs of
+ (Header _ _ xs : _) ->
+ -- remove notes or we get doubled footnotes
+ Pandoc (setMeta "title" (walk removeNote $ fromList xs)
+ nullMeta) bs
+ _ ->
+ Pandoc nullMeta bs)
+
+ chapterEntries <- lift $ zipWithM chapToEntry [1..] chapters
+
+ -- incredibly inefficient (TODO):
+ let containsMathML ent = epub3 &&
+ "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+ let containsSVG ent = epub3 &&
+ "<svg" `isInfixOf` (B8.unpack $ fromEntry ent)
+ let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
+
+ -- contents.opf
+ let chapterNode ent = unode "item" !
+ ([("id", toId $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", "application/xhtml+xml")]
+ ++ case props ent of
+ [] -> []
+ xs -> [("properties", unwords xs)])
+ $ ()
+ let chapterRefNode ent = unode "itemref" !
+ [("idref", toId $ eRelativePath ent)] $ ()
+ let pictureNode ent = unode "item" !
+ [("id", toId $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", fromMaybe "application/octet-stream"
+ $ mediaTypeOf $ eRelativePath ent)] $ ()
+ let fontNode ent = unode "item" !
+ [("id", toId $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
+ let plainTitle = case docTitle' meta of
+ [] -> case epubTitle metadata of
+ [] -> "UNTITLED"
+ (x:_) -> titleText x
+ x -> stringify x
+
+ let tocTitle = fromMaybe plainTitle $
+ metaValueToString <$> lookupMeta "toc-title" meta
+ uuid <- case epubIdentifier metadata of
+ (x:_) -> return $ identifierText x -- use first identifier as UUID
+ [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen
+ currentTime <- lift $ P.getCurrentTime
+ let contentsData = UTF8.fromStringLazy $ ppTopElement $
+ unode "package" ! [("version", case version of
+ EPUB2 -> "2.0"
+ EPUB3 -> "3.0")
+ ,("xmlns","http://www.idpf.org/2007/opf")
+ ,("unique-identifier","epub-id-1")] $
+ [ metadataElement version metadata currentTime
+ , unode "manifest" $
+ [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
+ ,("media-type","application/x-dtbncx+xml")] $ ()
+ , unode "item" ! [("id","style"), ("href","stylesheet.css")
+ ,("media-type","text/css")] $ ()
+ , unode "item" ! ([("id","nav")
+ ,("href","nav.xhtml")
+ ,("media-type","application/xhtml+xml")] ++
+ [("properties","nav") | epub3 ]) $ ()
+ ] ++
+ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
+ (case cpicEntry of
+ [] -> []
+ (x:_) -> [add_attrs
+ [Attr (unqual "properties") "cover-image" | epub3]
+ (pictureNode x)]) ++
+ map pictureNode picEntries ++
+ map fontNode fontEntries
+ , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $
+ case epubCoverImage metadata of
+ Nothing -> []
+ Just _ -> [ unode "itemref" !
+ [("idref", "cover_xhtml")] $ () ]
+ ++ ((unode "itemref" ! [("idref", "title_page_xhtml")
+ ,("linear",
+ case lookupMeta "title" meta of
+ Just _ -> "yes"
+ Nothing -> "no")] $ ()) :
+ [unode "itemref" ! [("idref", "nav")] $ ()
+ | writerTableOfContents opts ] ++
+ map chapterRefNode chapterEntries)
+ , unode "guide" $
+ [ unode "reference" !
+ [("type","toc"),("title", tocTitle),
+ ("href","nav.xhtml")] $ ()
+ ] ++
+ [ unode "reference" !
+ [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing
+ ]
+ ]
+ let contentsEntry = mkEntry "content.opf" contentsData
+
+ -- toc.ncx
+ let secs = hierarchicalize blocks'
+
+ let tocLevel = writerTOCDepth opts
+
+ let navPointNode :: PandocMonad m
+ => (Int -> String -> String -> [Element] -> Element)
+ -> S.Element -> StateT Int m Element
+ navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
+ n <- get
+ modify (+1)
+ let showNums :: [Int] -> String
+ showNums = intercalate "." . map show
+ let tit' = stringify ils
+ let tit = if writerNumberSections opts && not (null nums)
+ then showNums nums ++ " " ++ tit'
+ else tit'
+ src <- case lookup ident reftable of
+ Just x -> return x
+ Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable"
+ let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
+ isSec _ = False
+ let subsecs = filter isSec children
+ subs <- mapM (navPointNode formatter) subsecs
+ return $ formatter n tit src subs
+ navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk"
+
+ let navMapFormatter :: Int -> String -> String -> [Element] -> Element
+ navMapFormatter n tit src subs = unode "navPoint" !
+ [("id", "navPoint-" ++ show n)] $
+ [ unode "navLabel" $ unode "text" tit
+ , unode "content" ! [("src", src)] $ ()
+ ] ++ subs
+
+ let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
+ [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
+ , unode "content" ! [("src","title_page.xhtml")] $ () ]
+
+ navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1
+ let tocData = UTF8.fromStringLazy $ ppTopElement $
+ unode "ncx" ! [("version","2005-1")
+ ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
+ [ unode "head" $
+ [ unode "meta" ! [("name","dtb:uid")
+ ,("content", uuid)] $ ()
+ , unode "meta" ! [("name","dtb:depth")
+ ,("content", "1")] $ ()
+ , unode "meta" ! [("name","dtb:totalPageCount")
+ ,("content", "0")] $ ()
+ , unode "meta" ! [("name","dtb:maxPageNumber")
+ ,("content", "0")] $ ()
+ ] ++ case epubCoverImage metadata of
+ Nothing -> []
+ Just img -> [unode "meta" ! [("name","cover"),
+ ("content", toId img)] $ ()]
+ , unode "docTitle" $ unode "text" $ plainTitle
+ , unode "navMap" $
+ tpNode : navMap
+ ]
+ let tocEntry = mkEntry "toc.ncx" tocData
+
+ let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
+ navXhtmlFormatter n tit src subs = unode "li" !
+ [("id", "toc-li-" ++ show n)] $
+ (unode "a" ! [("href",src)]
+ $ tit)
+ : case subs of
+ [] -> []
+ (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
+
+ let navtag = if epub3 then "nav" else "div"
+ tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1
+ let navBlocks = [RawBlock (Format "html") $ ppElement $
+ unode navtag ! ([("epub:type","toc") | epub3] ++
+ [("id","toc")]) $
+ [ unode "h1" ! [("id","toc-title")] $ tocTitle
+ , unode "ol" ! [("class","toc")] $ tocBlocks ]]
+ let landmarks = if epub3
+ then [RawBlock (Format "html") $ ppElement $
+ unode "nav" ! [("epub:type","landmarks")
+ ,("hidden","hidden")] $
+ [ unode "ol" $
+ [ unode "li"
+ [ unode "a" ! [("href", "cover.xhtml")
+ ,("epub:type", "cover")] $
+ "Cover"] |
+ epubCoverImage metadata /= Nothing
+ ] ++
+ [ unode "li"
+ [ unode "a" ! [("href", "#toc")
+ ,("epub:type", "toc")] $
+ "Table of contents"
+ ] | writerTableOfContents opts
+ ]
+ ]
+ ]
+ else []
+ navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars }
+ (Pandoc (setMeta "title"
+ (walk removeNote $ fromList $ docTitle' meta) nullMeta)
+ (navBlocks ++ landmarks))
+ let navEntry = mkEntry "nav.xhtml" navData
+
+ -- mimetype
+ let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
+
+ -- container.xml
+ let containerData = UTF8.fromStringLazy $ ppTopElement $
+ unode "container" ! [("version","1.0")
+ ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
+ unode "rootfiles" $
+ unode "rootfile" ! [("full-path","content.opf")
+ ,("media-type","application/oebps-package+xml")] $ ()
+ let containerEntry = mkEntry "META-INF/container.xml" containerData
+
+ -- com.apple.ibooks.display-options.xml
+ let apple = UTF8.fromStringLazy $ ppTopElement $
+ unode "display_options" $
+ unode "platform" ! [("name","*")] $
+ unode "option" ! [("name","specified-fonts")] $ "true"
+ let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+
+ -- stylesheet
+ stylesheet <- case epubStylesheet metadata of
+ Just (StylesheetPath fp) -> UTF8.toStringLazy <$> (lift $ P.readFileLazy fp)
+ Just (StylesheetContents s) -> return s
+ Nothing -> UTF8.toString `fmap`
+ (lift $ P.readDataFile (writerUserDataDir opts) "epub.css")
+ let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
+
+ -- construct archive
+ let archive = foldr addEntryToArchive emptyArchive
+ (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
+ contentsEntry : tocEntry : navEntry :
+ (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
+ return $ fromArchive archive
+
+metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
+metadataElement version md currentTime =
+ unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
+ where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
+ ++ creatorNodes ++ contributorNodes ++ subjectNodes
+ ++ descriptionNodes ++ typeNodes ++ formatNodes
+ ++ publisherNodes ++ sourceNodes ++ relationNodes
+ ++ coverageNodes ++ rightsNodes ++ coverImageNodes
+ ++ modifiedNodes
+ withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+ ([1..] :: [Int]))
+ identifierNodes = withIds "epub-id" toIdentifierNode $
+ epubIdentifier md
+ titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
+ dateNodes = if version == EPUB2
+ then withIds "epub-date" toDateNode $ epubDate md
+ else -- epub3 allows only one dc:date
+ -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate
+ case epubDate md of
+ [] -> []
+ (x:_) -> [dcNode "date" ! [("id","epub-date")]
+ $ dateText x]
+ languageNodes = [dcTag "language" $ epubLanguage md]
+ creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
+ epubCreator md
+ contributorNodes = withIds "epub-contributor"
+ (toCreatorNode "contributor") $ epubContributor md
+ subjectNodes = map (dcTag "subject") $ epubSubject md
+ descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
+ typeNodes = maybe [] (dcTag' "type") $ epubType md
+ formatNodes = maybe [] (dcTag' "format") $ epubFormat md
+ publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md
+ sourceNodes = maybe [] (dcTag' "source") $ epubSource md
+ relationNodes = maybe [] (dcTag' "relation") $ epubRelation md
+ coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
+ rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
+ coverImageNodes = maybe []
+ (\img -> [unode "meta" ! [("name","cover"),
+ ("content",toId img)] $ ()])
+ $ epubCoverImage md
+ modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
+ (showDateTimeISO8601 currentTime) | version == EPUB3 ]
+ dcTag n s = unode ("dc:" ++ n) s
+ dcTag' n s = [dcTag n s]
+ toIdentifierNode id' (Identifier txt scheme)
+ | version == EPUB2 = [dcNode "identifier" !
+ ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $
+ txt]
+ | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","identifier-type"),
+ ("scheme","onix:codelist5")] $ x])
+ (schemeToOnix `fmap` scheme)
+ toCreatorNode s id' creator
+ | version == EPUB2 = [dcNode s !
+ (("id",id') :
+ maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
+ maybe [] (\x -> [("opf:role",x)])
+ (creatorRole creator >>= toRelator)) $ creatorText creator]
+ | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","file-as")] $ x])
+ (creatorFileAs creator) ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","role"),
+ ("scheme","marc:relators")] $ x])
+ (creatorRole creator >>= toRelator)
+ toTitleNode id' title
+ | version == EPUB2 = [dcNode "title" !
+ (("id",id') :
+ -- note: EPUB2 doesn't accept opf:title-type
+ maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $
+ titleText title]
+ | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
+ ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","file-as")] $ x])
+ (titleFileAs title) ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","title-type")] $ x])
+ (titleType title)
+ toDateNode id' date = [dcNode "date" !
+ (("id",id') :
+ maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
+ dateText date]
+ schemeToOnix "ISBN-10" = "02"
+ schemeToOnix "GTIN-13" = "03"
+ schemeToOnix "UPC" = "04"
+ schemeToOnix "ISMN-10" = "05"
+ schemeToOnix "DOI" = "06"
+ schemeToOnix "LCCN" = "13"
+ schemeToOnix "GTIN-14" = "14"
+ schemeToOnix "ISBN-13" = "15"
+ schemeToOnix "Legal deposit number" = "17"
+ schemeToOnix "URN" = "22"
+ schemeToOnix "OCLC" = "23"
+ schemeToOnix "ISMN-13" = "25"
+ schemeToOnix "ISBN-A" = "26"
+ schemeToOnix "JP" = "27"
+ schemeToOnix "OLCC" = "28"
+ schemeToOnix _ = "01"
+
+showDateTimeISO8601 :: UTCTime -> String
+showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+
+transformTag :: PandocMonad m
+ => WriterOptions
+ -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
+ -> Tag String
+ -> E m (Tag String)
+transformTag opts tag@(TagOpen name attr)
+ | name `elem` ["video", "source", "img", "audio"] &&
+ lookup "data-external" attr == Nothing = do
+ let src = fromAttrib "src" tag
+ let poster = fromAttrib "poster" tag
+ newsrc <- modifyMediaRef opts src
+ newposter <- modifyMediaRef opts poster
+ let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
+ [("src", newsrc) | not (null newsrc)] ++
+ [("poster", newposter) | not (null newposter)]
+ return $ TagOpen name attr'
+transformTag _ tag = return tag
+
+modifyMediaRef :: PandocMonad m
+ => WriterOptions
+ -> FilePath
+ -> E m FilePath
+modifyMediaRef _ "" = return ""
+modifyMediaRef opts oldsrc = do
+ media <- gets stMediaPaths
+ case lookup oldsrc media of
+ Just (n,_) -> return n
+ Nothing -> catchError
+ (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc
+ let new = "media/file" ++ show (length media) ++
+ fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
+ (('.':) <$> (mbMime >>= extensionFromMimeType))
+ epochtime <- floor `fmap` lift P.getPOSIXTime
+ let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
+ modify $ \st -> st{ stMediaPaths =
+ (oldsrc, (new, Just entry)):media}
+ return new)
+ (\e -> do
+ report $ CouldNotFetchResource oldsrc (show e)
+ return oldsrc)
+
+transformBlock :: PandocMonad m
+ => WriterOptions
+ -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
+ -> Block
+ -> E m Block
+transformBlock opts (RawBlock fmt raw)
+ | fmt == Format "html" = do
+ let tags = parseTags raw
+ tags' <- mapM (transformTag opts) tags
+ return $ RawBlock fmt (renderTags' tags')
+transformBlock _ b = return b
+
+transformInline :: PandocMonad m
+ => WriterOptions
+ -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
+ -> Inline
+ -> E m Inline
+transformInline opts (Image attr lab (src,tit)) = do
+ newsrc <- modifyMediaRef opts src
+ return $ Image attr lab (newsrc, tit)
+transformInline opts (x@(Math t m))
+ | WebTeX url <- writerHTMLMathMethod opts = do
+ newsrc <- modifyMediaRef opts (url ++ urlEncode m)
+ let mathclass = if t == DisplayMath then "display" else "inline"
+ return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")]
+transformInline opts (RawInline fmt raw)
+ | fmt == Format "html" = do
+ let tags = parseTags raw
+ tags' <- mapM (transformTag opts) tags
+ return $ RawInline fmt (renderTags' tags')
+transformInline _ x = return x
+
+(!) :: (t -> Element) -> [(String, String)] -> t -> Element
+(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
+
+-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
+ppTopElement :: Element -> String
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
+ -- unEntity removes numeric entities introduced by ppElement
+ -- (kindlegen seems to choke on these).
+ where unEntity [] = ""
+ unEntity ('&':'#':xs) =
+ let (ds,ys) = break (==';') xs
+ rest = drop 1 ys
+ in case safeRead ('\'':'\\':ds ++ "'") of
+ Just x -> x : unEntity rest
+ Nothing -> '&':'#':unEntity xs
+ unEntity (x:xs) = x : unEntity xs
+
+mediaTypeOf :: FilePath -> Maybe MimeType
+mediaTypeOf x =
+ let mediaPrefixes = ["image", "video", "audio"] in
+ case getMimeType x of
+ Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
+ _ -> Nothing
+
+-- Returns filename for chapter number.
+showChapter :: Int -> String
+showChapter = printf "ch%03d.xhtml"
+
+-- Add identifiers to any headers without them.
+addIdentifiers :: [Block] -> [Block]
+addIdentifiers bs = evalState (mapM go bs) Set.empty
+ where go (Header n (ident,classes,kvs) ils) = do
+ ids <- get
+ let ident' = if null ident
+ then uniqueIdent ils ids
+ else ident
+ modify $ Set.insert ident'
+ return $ Header n (ident',classes,kvs) ils
+ go x = return x
+
+-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
+normalizeDate' :: String -> Maybe String
+normalizeDate' xs =
+ let xs' = trim xs in
+ case xs' of
+ [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY
+ [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM
+ -> Just xs'
+ _ -> normalizeDate xs'
+
+toRelator :: String -> Maybe String
+toRelator x
+ | x `elem` relators = Just x
+ | otherwise = lookup (map toLower x) relatorMap
+
+relators :: [String]
+relators = map snd relatorMap
+
+relatorMap :: [(String, String)]
+relatorMap =
+ [("abridger", "abr")
+ ,("actor", "act")
+ ,("adapter", "adp")
+ ,("addressee", "rcp")
+ ,("analyst", "anl")
+ ,("animator", "anm")
+ ,("annotator", "ann")
+ ,("appellant", "apl")
+ ,("appellee", "ape")
+ ,("applicant", "app")
+ ,("architect", "arc")
+ ,("arranger", "arr")
+ ,("art copyist", "acp")
+ ,("art director", "adi")
+ ,("artist", "art")
+ ,("artistic director", "ard")
+ ,("assignee", "asg")
+ ,("associated name", "asn")
+ ,("attributed name", "att")
+ ,("auctioneer", "auc")
+ ,("author", "aut")
+ ,("author in quotations or text abstracts", "aqt")
+ ,("author of afterword, colophon, etc.", "aft")
+ ,("author of dialog", "aud")
+ ,("author of introduction, etc.", "aui")
+ ,("autographer", "ato")
+ ,("bibliographic antecedent", "ant")
+ ,("binder", "bnd")
+ ,("binding designer", "bdd")
+ ,("blurb writer", "blw")
+ ,("book designer", "bkd")
+ ,("book producer", "bkp")
+ ,("bookjacket designer", "bjd")
+ ,("bookplate designer", "bpd")
+ ,("bookseller", "bsl")
+ ,("braille embosser", "brl")
+ ,("broadcaster", "brd")
+ ,("calligrapher", "cll")
+ ,("cartographer", "ctg")
+ ,("caster", "cas")
+ ,("censor", "cns")
+ ,("choreographer", "chr")
+ ,("cinematographer", "cng")
+ ,("client", "cli")
+ ,("collection registrar", "cor")
+ ,("collector", "col")
+ ,("collotyper", "clt")
+ ,("colorist", "clr")
+ ,("commentator", "cmm")
+ ,("commentator for written text", "cwt")
+ ,("compiler", "com")
+ ,("complainant", "cpl")
+ ,("complainant-appellant", "cpt")
+ ,("complainant-appellee", "cpe")
+ ,("composer", "cmp")
+ ,("compositor", "cmt")
+ ,("conceptor", "ccp")
+ ,("conductor", "cnd")
+ ,("conservator", "con")
+ ,("consultant", "csl")
+ ,("consultant to a project", "csp")
+ ,("contestant", "cos")
+ ,("contestant-appellant", "cot")
+ ,("contestant-appellee", "coe")
+ ,("contestee", "cts")
+ ,("contestee-appellant", "ctt")
+ ,("contestee-appellee", "cte")
+ ,("contractor", "ctr")
+ ,("contributor", "ctb")
+ ,("copyright claimant", "cpc")
+ ,("copyright holder", "cph")
+ ,("corrector", "crr")
+ ,("correspondent", "crp")
+ ,("costume designer", "cst")
+ ,("court governed", "cou")
+ ,("court reporter", "crt")
+ ,("cover designer", "cov")
+ ,("creator", "cre")
+ ,("curator", "cur")
+ ,("dancer", "dnc")
+ ,("data contributor", "dtc")
+ ,("data manager", "dtm")
+ ,("dedicatee", "dte")
+ ,("dedicator", "dto")
+ ,("defendant", "dfd")
+ ,("defendant-appellant", "dft")
+ ,("defendant-appellee", "dfe")
+ ,("degree granting institution", "dgg")
+ ,("delineator", "dln")
+ ,("depicted", "dpc")
+ ,("depositor", "dpt")
+ ,("designer", "dsr")
+ ,("director", "drt")
+ ,("dissertant", "dis")
+ ,("distribution place", "dbp")
+ ,("distributor", "dst")
+ ,("donor", "dnr")
+ ,("draftsman", "drm")
+ ,("dubious author", "dub")
+ ,("editor", "edt")
+ ,("editor of compilation", "edc")
+ ,("editor of moving image work", "edm")
+ ,("electrician", "elg")
+ ,("electrotyper", "elt")
+ ,("enacting jurisdiction", "enj")
+ ,("engineer", "eng")
+ ,("engraver", "egr")
+ ,("etcher", "etr")
+ ,("event place", "evp")
+ ,("expert", "exp")
+ ,("facsimilist", "fac")
+ ,("field director", "fld")
+ ,("film director", "fmd")
+ ,("film distributor", "fds")
+ ,("film editor", "flm")
+ ,("film producer", "fmp")
+ ,("filmmaker", "fmk")
+ ,("first party", "fpy")
+ ,("forger", "frg")
+ ,("former owner", "fmo")
+ ,("funder", "fnd")
+ ,("geographic information specialist", "gis")
+ ,("honoree", "hnr")
+ ,("host", "hst")
+ ,("host institution", "his")
+ ,("illuminator", "ilu")
+ ,("illustrator", "ill")
+ ,("inscriber", "ins")
+ ,("instrumentalist", "itr")
+ ,("interviewee", "ive")
+ ,("interviewer", "ivr")
+ ,("inventor", "inv")
+ ,("issuing body", "isb")
+ ,("judge", "jud")
+ ,("jurisdiction governed", "jug")
+ ,("laboratory", "lbr")
+ ,("laboratory director", "ldr")
+ ,("landscape architect", "lsa")
+ ,("lead", "led")
+ ,("lender", "len")
+ ,("libelant", "lil")
+ ,("libelant-appellant", "lit")
+ ,("libelant-appellee", "lie")
+ ,("libelee", "lel")
+ ,("libelee-appellant", "let")
+ ,("libelee-appellee", "lee")
+ ,("librettist", "lbt")
+ ,("licensee", "lse")
+ ,("licensor", "lso")
+ ,("lighting designer", "lgd")
+ ,("lithographer", "ltg")
+ ,("lyricist", "lyr")
+ ,("manufacture place", "mfp")
+ ,("manufacturer", "mfr")
+ ,("marbler", "mrb")
+ ,("markup editor", "mrk")
+ ,("metadata contact", "mdc")
+ ,("metal-engraver", "mte")
+ ,("moderator", "mod")
+ ,("monitor", "mon")
+ ,("music copyist", "mcp")
+ ,("musical director", "msd")
+ ,("musician", "mus")
+ ,("narrator", "nrt")
+ ,("onscreen presenter", "osp")
+ ,("opponent", "opn")
+ ,("organizer of meeting", "orm")
+ ,("originator", "org")
+ ,("other", "oth")
+ ,("owner", "own")
+ ,("panelist", "pan")
+ ,("papermaker", "ppm")
+ ,("patent applicant", "pta")
+ ,("patent holder", "pth")
+ ,("patron", "pat")
+ ,("performer", "prf")
+ ,("permitting agency", "pma")
+ ,("photographer", "pht")
+ ,("plaintiff", "ptf")
+ ,("plaintiff-appellant", "ptt")
+ ,("plaintiff-appellee", "pte")
+ ,("platemaker", "plt")
+ ,("praeses", "pra")
+ ,("presenter", "pre")
+ ,("printer", "prt")
+ ,("printer of plates", "pop")
+ ,("printmaker", "prm")
+ ,("process contact", "prc")
+ ,("producer", "pro")
+ ,("production company", "prn")
+ ,("production designer", "prs")
+ ,("production manager", "pmn")
+ ,("production personnel", "prd")
+ ,("production place", "prp")
+ ,("programmer", "prg")
+ ,("project director", "pdr")
+ ,("proofreader", "pfr")
+ ,("provider", "prv")
+ ,("publication place", "pup")
+ ,("publisher", "pbl")
+ ,("publishing director", "pbd")
+ ,("puppeteer", "ppt")
+ ,("radio director", "rdd")
+ ,("radio producer", "rpc")
+ ,("recording engineer", "rce")
+ ,("recordist", "rcd")
+ ,("redaktor", "red")
+ ,("renderer", "ren")
+ ,("reporter", "rpt")
+ ,("repository", "rps")
+ ,("research team head", "rth")
+ ,("research team member", "rtm")
+ ,("researcher", "res")
+ ,("respondent", "rsp")
+ ,("respondent-appellant", "rst")
+ ,("respondent-appellee", "rse")
+ ,("responsible party", "rpy")
+ ,("restager", "rsg")
+ ,("restorationist", "rsr")
+ ,("reviewer", "rev")
+ ,("rubricator", "rbr")
+ ,("scenarist", "sce")
+ ,("scientific advisor", "sad")
+ ,("screenwriter", "aus")
+ ,("scribe", "scr")
+ ,("sculptor", "scl")
+ ,("second party", "spy")
+ ,("secretary", "sec")
+ ,("seller", "sll")
+ ,("set designer", "std")
+ ,("setting", "stg")
+ ,("signer", "sgn")
+ ,("singer", "sng")
+ ,("sound designer", "sds")
+ ,("speaker", "spk")
+ ,("sponsor", "spn")
+ ,("stage director", "sgd")
+ ,("stage manager", "stm")
+ ,("standards body", "stn")
+ ,("stereotyper", "str")
+ ,("storyteller", "stl")
+ ,("supporting host", "sht")
+ ,("surveyor", "srv")
+ ,("teacher", "tch")
+ ,("technical director", "tcd")
+ ,("television director", "tld")
+ ,("television producer", "tlp")
+ ,("thesis advisor", "ths")
+ ,("transcriber", "trc")
+ ,("translator", "trl")
+ ,("type designer", "tyd")
+ ,("typographer", "tyg")
+ ,("university place", "uvp")
+ ,("videographer", "vdg")
+ ,("witness", "wit")
+ ,("wood engraver", "wde")
+ ,("woodcutter", "wdc")
+ ,("writer of accompanying material", "wam")
+ ,("writer of added commentary", "wac")
+ ,("writer of added lyrics", "wal")
+ ,("writer of added text", "wat")
+ ]
+
+docTitle' :: Meta -> [Inline]
+docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta
+ where go (MetaString s) = [Str s]
+ go (MetaInlines xs) = xs
+ go (MetaBlocks [Para xs]) = xs
+ go (MetaBlocks [Plain xs]) = xs
+ go (MetaMap m) =
+ case M.lookup "type" m of
+ Just x | stringify x == "main" ->
+ maybe [] go $ M.lookup "text" m
+ _ -> []
+ go (MetaList xs) = concatMap go xs
+ go _ = []
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
new file mode 100644
index 000000000..967fe6a4c
--- /dev/null
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -0,0 +1,617 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (c) 2011-2012, Sergey Astanin
+All rights reserved.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format.
+
+FictionBook is an XML-based e-book format. For more information see:
+<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1>
+
+-}
+module Text.Pandoc.Writers.FB2 (writeFB2) where
+
+import Control.Monad.State (StateT, evalStateT, get, modify, lift)
+import Control.Monad.State (liftM)
+import Data.ByteString.Base64 (encode)
+import Data.Char (toLower, isSpace, isAscii, isControl)
+import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix)
+import Data.Either (lefts, rights)
+import Network.HTTP (urlEncode)
+import Network.URI (isURI)
+import Text.XML.Light
+import qualified Text.XML.Light as X
+import qualified Text.XML.Light.Cursor as XC
+import qualified Data.ByteString.Char8 as B8
+import Control.Monad.Except (throwError, catchError)
+
+import Text.Pandoc.Logging
+import Text.Pandoc.Definition
+import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize,
+ linesToPara)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
+
+-- | Data to be written at the end of the document:
+-- (foot)notes, URLs, references, images.
+data FbRenderState = FbRenderState
+ { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text
+ , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path
+ , parentListMarker :: String -- ^ list marker of the parent ordered list
+ , parentBulletLevel :: Int -- ^ nesting level of the unordered list
+ , writerOptions :: WriterOptions
+ } deriving (Show)
+
+-- | FictionBook building monad.
+type FBM m = StateT FbRenderState m
+
+newFB :: FbRenderState
+newFB = FbRenderState { footnotes = [], imagesToFetch = []
+ , parentListMarker = "", parentBulletLevel = 0
+ , writerOptions = def }
+
+data ImageMode = NormalImage | InlineImage deriving (Eq)
+instance Show ImageMode where
+ show NormalImage = "imageType"
+ show InlineImage = "inlineImageType"
+
+-- | Produce an FB2 document from a 'Pandoc' document.
+writeFB2 :: PandocMonad m
+ => WriterOptions -- ^ conversion options
+ -> Pandoc -- ^ document to convert
+ -> m String -- ^ FictionBook2 document (not encoded yet)
+writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc
+
+pandocToFB2 :: PandocMonad m
+ => WriterOptions
+ -> Pandoc
+ -> FBM m String
+pandocToFB2 opts (Pandoc meta blocks) = do
+ modify (\s -> s { writerOptions = opts })
+ desc <- description meta
+ fp <- frontpage meta
+ secs <- renderSections 1 blocks
+ let body = el "body" $ fp ++ secs
+ notes <- renderFootnotes
+ (imgs,missing) <- liftM imagesToFetch get >>= \s -> lift (fetchImages s)
+ let body' = replaceImagesWithAlt missing body
+ let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
+ return $ xml_head ++ (showContent fb2_xml) ++ "\n"
+ where
+ xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ fb2_attrs =
+ let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0"
+ xlink = "http://www.w3.org/1999/xlink"
+ in [ uattr "xmlns" xmlns
+ , attr ("xmlns", "l") xlink ]
+
+frontpage :: PandocMonad m => Meta -> FBM m [Content]
+frontpage meta' = do
+ t <- cMapM toXml . docTitle $ meta'
+ return $
+ [ el "title" (el "p" t)
+ , el "annotation" (map (el "p" . cMap plain)
+ (docAuthors meta' ++ [docDate meta']))
+ ]
+
+description :: PandocMonad m => Meta -> FBM m Content
+description meta' = do
+ bt <- booktitle meta'
+ let as = authors meta'
+ dd <- docdate meta'
+ return $ el "description"
+ [ el "title-info" (bt ++ as ++ dd)
+ , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version
+ ]
+
+booktitle :: PandocMonad m => Meta -> FBM m [Content]
+booktitle meta' = do
+ t <- cMapM toXml . docTitle $ meta'
+ return $ if null t
+ then []
+ else [ el "book-title" t ]
+
+authors :: Meta -> [Content]
+authors meta' = cMap author (docAuthors meta')
+
+author :: [Inline] -> [Content]
+author ss =
+ let ws = words . cMap plain $ ss
+ email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws)
+ ws' = filter ('@' `notElem`) ws
+ names = case ws' of
+ (nickname:[]) -> [ el "nickname" nickname ]
+ (fname:lname:[]) -> [ el "first-name" fname
+ , el "last-name" lname ]
+ (fname:rest) -> [ el "first-name" fname
+ , el "middle-name" (concat . init $ rest)
+ , el "last-name" (last rest) ]
+ ([]) -> []
+ in list $ el "author" (names ++ email)
+
+docdate :: PandocMonad m => Meta -> FBM m [Content]
+docdate meta' = do
+ let ss = docDate meta'
+ d <- cMapM toXml ss
+ return $ if null d
+ then []
+ else [el "date" d]
+
+-- | Divide the stream of blocks into sections and convert to XML
+-- representation.
+renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content]
+renderSections level blocks = do
+ let secs = splitSections level blocks
+ mapM (renderSection level) secs
+
+renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content
+renderSection level (ttl, body) = do
+ title <- if null ttl
+ then return []
+ else return . list . el "title" . formatTitle $ ttl
+ content <- if (hasSubsections body)
+ then renderSections (level + 1) body
+ else cMapM blockToXml body
+ return $ el "section" (title ++ content)
+ where
+ hasSubsections = any isHeaderBlock
+
+-- | Only <p> and <empty-line> are allowed within <title> in FB2.
+formatTitle :: [Inline] -> [Content]
+formatTitle inlines =
+ let lns = split isLineBreak inlines
+ lns' = map (el "p" . cMap plain) lns
+ in intersperse (el "empty-line" ()) lns'
+
+split :: (a -> Bool) -> [a] -> [[a]]
+split _ [] = []
+split cond xs = let (b,a) = break cond xs
+ in (b:split cond (drop 1 a))
+
+isLineBreak :: Inline -> Bool
+isLineBreak LineBreak = True
+isLineBreak _ = False
+
+-- | Divide the stream of block elements into sections: [(title, blocks)].
+splitSections :: Int -> [Block] -> [([Inline], [Block])]
+splitSections level blocks = reverse $ revSplit (reverse blocks)
+ where
+ revSplit [] = []
+ revSplit rblocks =
+ let (lastsec, before) = break sameLevel rblocks
+ (header, prevblocks) =
+ case before of
+ ((Header n _ title):prevblocks') ->
+ if n == level
+ then (title, prevblocks')
+ else ([], before)
+ _ -> ([], before)
+ in (header, reverse lastsec) : revSplit prevblocks
+ sameLevel (Header n _ _) = n == level
+ sameLevel _ = False
+
+-- | Make another FictionBook body with footnotes.
+renderFootnotes :: PandocMonad m => FBM m [Content]
+renderFootnotes = do
+ fns <- footnotes `liftM` get
+ if null fns
+ then return [] -- no footnotes
+ else return . list $
+ el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
+ where
+ renderFN (n, idstr, cs) =
+ let fn_texts = (el "title" (el "p" (show n))) : cs
+ in el "section" ([uattr "id" idstr], fn_texts)
+
+-- | Fetch images and encode them for the FictionBook XML.
+-- Return image data and a list of hrefs of the missing images.
+fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String])
+fetchImages links = do
+ imgs <- mapM (uncurry fetchImage) links
+ return $ (rights imgs, lefts imgs)
+
+-- | Fetch image data from disk or from network and make a <binary> XML section.
+-- Return either (Left hrefOfMissingImage) or (Right xmlContent).
+fetchImage :: PandocMonad m => String -> String -> m (Either String Content)
+fetchImage href link = do
+ mbimg <-
+ case (isURI link, readDataURI link) of
+ (True, Just (mime,_,True,base64)) ->
+ let mime' = map toLower mime
+ in if mime' == "image/png" || mime' == "image/jpeg"
+ then return (Just (mime',base64))
+ else return Nothing
+ (True, Just _) -> return Nothing -- not base64-encoded
+ _ -> do
+ catchError (do (bs, mbmime) <- P.fetchItem Nothing link
+ case mbmime of
+ Nothing -> do
+ report $ CouldNotDetermineMimeType link
+ return Nothing
+ Just mime -> return $ Just (mime,
+ B8.unpack $ encode bs))
+ (\e ->
+ do report $ CouldNotFetchResource link (show e)
+ return Nothing)
+ case mbimg of
+ Just (imgtype, imgdata) -> do
+ return . Right $ el "binary"
+ ( [uattr "id" href
+ , uattr "content-type" imgtype]
+ , txt imgdata )
+ _ -> return (Left ('#':href))
+
+
+-- | Extract mime type and encoded data from the Data URI.
+readDataURI :: String -- ^ URI
+ -> Maybe (String,String,Bool,String)
+ -- ^ Maybe (mime,charset,isBase64,data)
+readDataURI uri =
+ case stripPrefix "data:" uri of
+ Nothing -> Nothing
+ Just rest ->
+ let meta = takeWhile (/= ',') rest -- without trailing ','
+ uridata = drop (length meta + 1) rest
+ parts = split (== ';') meta
+ (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
+ in Just (mime,cs,enc,uridata)
+
+ where
+ upd str m@(mime,cs,enc)
+ | isMimeType str = (str,cs,enc)
+ | Just str' <- stripPrefix "charset=" str = (mime,str',enc)
+ | str == "base64" = (mime,cs,True)
+ | otherwise = m
+
+-- Without parameters like ;charset=...; see RFC 2045, 5.1
+isMimeType :: String -> Bool
+isMimeType s =
+ case split (=='/') s of
+ [mtype,msubtype] ->
+ ((map toLower mtype) `elem` types
+ || "x-" `isPrefixOf` (map toLower mtype))
+ && all valid mtype
+ && all valid msubtype
+ _ -> False
+ where
+ types = ["text","image","audio","video","application","message","multipart"]
+ valid c = isAscii c && not (isControl c) && not (isSpace c) &&
+ c `notElem` "()<>@,;:\\\"/[]?="
+
+footnoteID :: Int -> String
+footnoteID i = "n" ++ (show i)
+
+linkID :: Int -> String
+linkID i = "l" ++ (show i)
+
+-- | Convert a block-level Pandoc's element to FictionBook XML representation.
+blockToXml :: PandocMonad m => Block -> FBM m [Content]
+blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2
+blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
+-- title beginning with fig: indicates that the image is a figure
+blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) =
+ insertImage NormalImage (Image atr alt (src,tit))
+blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss
+blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
+ map (el "p" . el "code") . lines $ s
+blockToXml b@(RawBlock _ _) = do
+ report $ BlockNotRendered b
+ return []
+blockToXml (Div _ bs) = cMapM blockToXml bs
+blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs
+blockToXml (LineBlock lns) = blockToXml $ linesToPara lns
+blockToXml (OrderedList a bss) = do
+ state <- get
+ let pmrk = parentListMarker state
+ let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a
+ let mkitem mrk bs = do
+ modify (\s -> s { parentListMarker = mrk })
+ itemtext <- cMapM blockToXml . paraToPlain $ bs
+ modify (\s -> s { parentListMarker = pmrk }) -- old parent marker
+ return . el "p" $ [ txt mrk, txt " " ] ++ itemtext
+ mapM (uncurry mkitem) (zip markers bss)
+blockToXml (BulletList bss) = do
+ state <- get
+ let level = parentBulletLevel state
+ let pmrk = parentListMarker state
+ let prefix = replicate (length pmrk) ' '
+ let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"]
+ let mrk = prefix ++ bullets !! (level `mod` (length bullets))
+ let mkitem bs = do
+ modify (\s -> s { parentBulletLevel = (level+1) })
+ itemtext <- cMapM blockToXml . paraToPlain $ bs
+ modify (\s -> s { parentBulletLevel = level }) -- restore bullet level
+ return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext
+ mapM mkitem bss
+blockToXml (DefinitionList defs) =
+ cMapM mkdef defs
+ where
+ mkdef (term, bss) = do
+ def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss
+ t <- wrap "strong" term
+ return [ el "p" t, el "p" def' ]
+ sep blocks =
+ if all needsBreak blocks then
+ blocks ++ [Plain [LineBreak]]
+ else
+ blocks
+ needsBreak (Para _) = False
+ needsBreak (Plain ins) = LineBreak `notElem` ins
+ needsBreak _ = True
+blockToXml (Header _ _ _) = -- should never happen, see renderSections
+ throwError $ PandocShouldNeverHappenError "unexpected header in section text"
+blockToXml HorizontalRule = return
+ [ el "empty-line" ()
+ , el "p" (txt (replicate 10 '—'))
+ , el "empty-line" () ]
+blockToXml (Table caption aligns _ headers rows) = do
+ hd <- mkrow "th" headers aligns
+ bd <- mapM (\r -> mkrow "td" r aligns) rows
+ c <- return . el "emphasis" =<< cMapM toXml caption
+ return [el "table" (hd : bd), el "p" c]
+ where
+ mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content
+ mkrow tag cells aligns' =
+ (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns'))
+ --
+ mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content
+ mkcell tag (cell, align) = do
+ cblocks <- cMapM blockToXml cell
+ return $ el tag ([align_attr align], cblocks)
+ --
+ align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
+ align_str AlignLeft = "left"
+ align_str AlignCenter = "center"
+ align_str AlignRight = "right"
+ align_str AlignDefault = "left"
+blockToXml Null = return []
+
+-- Replace paragraphs with plain text and line break.
+-- Necessary to simulate multi-paragraph lists in FB2.
+paraToPlain :: [Block] -> [Block]
+paraToPlain [] = []
+paraToPlain (Para inlines : rest) =
+ let p = (Plain (inlines ++ [LineBreak]))
+ in p : paraToPlain rest
+paraToPlain (p:rest) = p : paraToPlain rest
+
+-- Simulate increased indentation level. Will not really work
+-- for multi-line paragraphs.
+indent :: Block -> Block
+indent = indentBlock
+ where
+ -- indentation space
+ spacer :: String
+ spacer = replicate 4 ' '
+ --
+ indentBlock (Plain ins) = Plain ((Str spacer):ins)
+ indentBlock (Para ins) = Para ((Str spacer):ins)
+ indentBlock (CodeBlock a s) =
+ let s' = unlines . map (spacer++) . lines $ s
+ in CodeBlock a s'
+ indentBlock (BlockQuote bs) = BlockQuote (map indent bs)
+ indentBlock (Header l attr' ins) = Header l attr' (indentLines ins)
+ indentBlock everythingElse = everythingElse
+ -- indent every (explicit) line
+ indentLines :: [Inline] -> [Inline]
+ indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
+ in intercalate [LineBreak] $ map ((Str spacer):) lns
+
+-- | Convert a Pandoc's Inline element to FictionBook XML representation.
+toXml :: PandocMonad m => Inline -> FBM m [Content]
+toXml (Str s) = return [txt s]
+toXml (Span _ ils) = cMapM toXml ils
+toXml (Emph ss) = list `liftM` wrap "emphasis" ss
+toXml (Strong ss) = list `liftM` wrap "strong" ss
+toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
+toXml (Superscript ss) = list `liftM` wrap "sup" ss
+toXml (Subscript ss) = list `liftM` wrap "sub" ss
+toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
+toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
+ inner <- cMapM toXml ss
+ return $ [txt "‘"] ++ inner ++ [txt "’"]
+toXml (Quoted DoubleQuote ss) = do
+ inner <- cMapM toXml ss
+ return $ [txt "“"] ++ inner ++ [txt "”"]
+toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles
+toXml (Code _ s) = return [el "code" s]
+toXml Space = return [txt " "]
+toXml SoftBreak = return [txt " "]
+toXml LineBreak = return [el "empty-line" ()]
+toXml (Math _ formula) = insertMath InlineImage formula
+toXml il@(RawInline _ _) = do
+ report $ InlineNotRendered il
+ return [] -- raw TeX and raw HTML are suppressed
+toXml (Link _ text (url,ttl)) = do
+ fns <- footnotes `liftM` get
+ let n = 1 + length fns
+ let ln_id = linkID n
+ let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]"
+ ln_text <- cMapM toXml text
+ let ln_desc =
+ let ttl' = dropWhile isSpace ttl
+ in if null ttl'
+ then list . el "p" $ el "code" url
+ else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ]
+ modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns })
+ return $ ln_text ++
+ [ el "a"
+ ( [ attr ("l","href") ('#':ln_id)
+ , uattr "type" "note" ]
+ , ln_ref) ]
+toXml img@(Image _ _ _) = insertImage InlineImage img
+toXml (Note bs) = do
+ fns <- footnotes `liftM` get
+ let n = 1 + length fns
+ let fn_id = footnoteID n
+ fn_desc <- cMapM blockToXml bs
+ modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns })
+ let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]"
+ return . list $ el "a" ( [ attr ("l","href") ('#':fn_id)
+ , uattr "type" "note" ]
+ , fn_ref )
+
+insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content]
+insertMath immode formula = do
+ htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get
+ case htmlMath of
+ WebTeX url -> do
+ let alt = [Code nullAttr formula]
+ let imgurl = url ++ urlEncode formula
+ let img = Image nullAttr alt (imgurl, "")
+ insertImage immode img
+ _ -> return [el "code" formula]
+
+insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
+insertImage immode (Image _ alt (url,ttl)) = do
+ images <- imagesToFetch `liftM` get
+ let n = 1 + length images
+ let fname = "image" ++ show n
+ modify (\s -> s { imagesToFetch = (fname, url) : images })
+ let ttlattr = case (immode, null ttl) of
+ (NormalImage, False) -> [ uattr "title" ttl ]
+ _ -> []
+ return . list $
+ el "image" $
+ [ attr ("l","href") ('#':fname)
+ , attr ("l","type") (show immode)
+ , uattr "alt" (cMap plain alt) ]
+ ++ ttlattr
+insertImage _ _ = error "unexpected inline instead of image"
+
+replaceImagesWithAlt :: [String] -> Content -> Content
+replaceImagesWithAlt missingHrefs body =
+ let cur = XC.fromContent body
+ cur' = replaceAll cur
+ in XC.toTree . XC.root $ cur'
+ where
+ --
+ replaceAll :: XC.Cursor -> XC.Cursor
+ replaceAll c =
+ let n = XC.current c
+ c' = if isImage n && isMissing n
+ then XC.modifyContent replaceNode c
+ else c
+ in case XC.nextDF c' of
+ (Just cnext) -> replaceAll cnext
+ Nothing -> c' -- end of document
+ --
+ isImage :: Content -> Bool
+ isImage (Elem e) = (elName e) == (uname "image")
+ isImage _ = False
+ --
+ isMissing (Elem img@(Element _ _ _ _)) =
+ let imgAttrs = elAttribs img
+ badAttrs = map (attr ("l","href")) missingHrefs
+ in any (`elem` imgAttrs) badAttrs
+ isMissing _ = False
+ --
+ replaceNode :: Content -> Content
+ replaceNode n@(Elem img@(Element _ _ _ _)) =
+ let attrs = elAttribs img
+ alt = getAttrVal attrs (uname "alt")
+ imtype = getAttrVal attrs (qname "l" "type")
+ in case (alt, imtype) of
+ (Just alt', Just imtype') ->
+ if imtype' == show NormalImage
+ then el "p" alt'
+ else txt alt'
+ (Just alt', Nothing) -> txt alt' -- no type attribute
+ _ -> n -- don't replace if alt text is not found
+ replaceNode n = n
+ --
+ getAttrVal :: [X.Attr] -> QName -> Maybe String
+ getAttrVal attrs name =
+ case filter ((name ==) . attrKey) attrs of
+ (a:_) -> Just (attrVal a)
+ _ -> Nothing
+
+
+-- | Wrap all inlines with an XML tag (given its unqualified name).
+wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
+wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
+
+-- " Create a singleton list.
+list :: a -> [a]
+list = (:[])
+
+-- | Convert an 'Inline' to plaintext.
+plain :: Inline -> String
+plain (Str s) = s
+plain (Emph ss) = concat (map plain ss)
+plain (Span _ ss) = concat (map plain ss)
+plain (Strong ss) = concat (map plain ss)
+plain (Strikeout ss) = concat (map plain ss)
+plain (Superscript ss) = concat (map plain ss)
+plain (Subscript ss) = concat (map plain ss)
+plain (SmallCaps ss) = concat (map plain ss)
+plain (Quoted _ ss) = concat (map plain ss)
+plain (Cite _ ss) = concat (map plain ss) -- FIXME
+plain (Code _ s) = s
+plain Space = " "
+plain SoftBreak = " "
+plain LineBreak = "\n"
+plain (Math _ s) = s
+plain (RawInline _ _) = ""
+plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"])
+plain (Image _ alt _) = concat (map plain alt)
+plain (Note _) = "" -- FIXME
+
+-- | Create an XML element.
+el :: (Node t)
+ => String -- ^ unqualified element name
+ -> t -- ^ node contents
+ -> Content -- ^ XML content
+el name cs = Elem $ unode name cs
+
+-- | Put empty lines around content
+spaceBeforeAfter :: [Content] -> [Content]
+spaceBeforeAfter cs =
+ let emptyline = el "empty-line" ()
+ in [emptyline] ++ cs ++ [emptyline]
+
+-- | Create a plain-text XML content.
+txt :: String -> Content
+txt s = Text $ CData CDataText s Nothing
+
+-- | Create an XML attribute with an unqualified name.
+uattr :: String -> String -> Text.XML.Light.Attr
+uattr name val = Attr (uname name) val
+
+-- | Create an XML attribute with a qualified name from given namespace.
+attr :: (String, String) -> String -> Text.XML.Light.Attr
+attr (ns, name) val = Attr (qname ns name) val
+
+-- | Unqualified name
+uname :: String -> QName
+uname name = QName name Nothing Nothing
+
+-- | Qualified name
+qname :: String -> String -> QName
+qname ns name = QName name Nothing (Just ns)
+
+-- | Abbreviation for 'concatMap'.
+cMap :: (a -> [b]) -> [a] -> [b]
+cMap = concatMap
+
+-- | Monadic equivalent of 'concatMap'.
+cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+cMapM f xs = concat `liftM` mapM f xs
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..99f8c5b42
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,1069 @@
+{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.HTML
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to HTML.
+-}
+module Text.Pandoc.Writers.HTML (
+ writeHtml4,
+ writeHtml4String,
+ writeHtml5,
+ writeHtml5String,
+ writeHtmlStringForEPUB,
+ writeS5,
+ writeSlidy,
+ writeSlideous,
+ writeDZSlides,
+ writeRevealJs
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Data.Monoid ((<>))
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting ( highlight, styleToCss,
+ formatHtmlInline, formatHtmlBlock )
+import Text.Pandoc.XML (fromEntities, escapeStringForXML)
+import Network.URI ( parseURIReference, URI(..), unEscapeString )
+import Network.HTTP ( urlEncode )
+import Numeric ( showHex )
+import Data.Char ( ord, toLower )
+import Data.List ( isPrefixOf, intersperse )
+import Data.String ( fromString )
+import Data.Maybe ( catMaybes, fromMaybe, isJust )
+import Control.Monad.State
+import Text.Blaze.Html hiding(contents)
+#if MIN_VERSION_blaze_markup(0,6,3)
+#else
+import Text.Blaze.Internal(preEscapedString)
+#endif
+#if MIN_VERSION_blaze_html(0,5,1)
+import qualified Text.Blaze.XHtml5 as H5
+#else
+import qualified Text.Blaze.Html5 as H5
+#endif
+import qualified Text.Blaze.XHtml1.Transitional as H
+import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
+import Text.Blaze.Html.Renderer.String (renderHtml)
+import Text.TeXMath
+import Text.XML.Light.Output
+import Text.XML.Light (unode, elChildren, unqual)
+import qualified Text.XML.Light as XML
+import System.FilePath (takeExtension)
+import Data.Aeson (Value)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+data WriterState = WriterState
+ { stNotes :: [Html] -- ^ List of notes
+ , stMath :: Bool -- ^ Math is used in document
+ , stQuotes :: Bool -- ^ <q> tag is used
+ , stHighlighting :: Bool -- ^ Syntax highlighting is used
+ , stSecNum :: [Int] -- ^ Number of current section
+ , stElement :: Bool -- ^ Processing an Element
+ , stHtml5 :: Bool -- ^ Use HTML5
+ , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub
+ , stSlideVariant :: HTMLSlideVariant
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
+ stHighlighting = False, stSecNum = [],
+ stElement = False, stHtml5 = False,
+ stEPUBVersion = Nothing,
+ stSlideVariant = NoSlides}
+
+-- Helpers to render HTML with the appropriate function.
+
+strToHtml :: String -> Html
+strToHtml ('\'':xs) = preEscapedString "\'" `mappend` strToHtml xs
+strToHtml xs@(_:_) = case break (=='\'') xs of
+ (_ ,[]) -> toHtml xs
+ (ys,zs) -> toHtml ys `mappend` strToHtml zs
+strToHtml [] = ""
+
+-- | Hard linebreak.
+nl :: WriterOptions -> Html
+nl opts = if writerWrapText opts == WrapNone
+ then mempty
+ else preEscapedString "\n"
+
+-- | Convert Pandoc document to Html 5 string.
+writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeHtml5String = writeHtmlString'
+ defaultWriterState{ stHtml5 = True }
+
+-- | Convert Pandoc document to Html 5 structure.
+writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
+writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True }
+
+-- | Convert Pandoc document to Html 4 string.
+writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeHtml4String = writeHtmlString'
+ defaultWriterState{ stHtml5 = False }
+
+-- | Convert Pandoc document to Html 4 structure.
+writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html
+writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False }
+
+-- | Convert Pandoc document to Html appropriate for an epub version.
+writeHtmlStringForEPUB :: PandocMonad m
+ => EPUBVersion -> WriterOptions -> Pandoc -> m String
+writeHtmlStringForEPUB version = writeHtmlString'
+ defaultWriterState{ stHtml5 = version == EPUB3,
+ stEPUBVersion = Just version }
+
+-- | Convert Pandoc document to Reveal JS HTML slide show.
+writeRevealJs :: PandocMonad m
+ => WriterOptions -> Pandoc -> m String
+writeRevealJs = writeHtmlSlideShow' RevealJsSlides
+
+-- | Convert Pandoc document to S5 HTML slide show.
+writeS5 :: PandocMonad m
+ => WriterOptions -> Pandoc -> m String
+writeS5 = writeHtmlSlideShow' S5Slides
+
+-- | Convert Pandoc document to Slidy HTML slide show.
+writeSlidy :: PandocMonad m
+ => WriterOptions -> Pandoc -> m String
+writeSlidy = writeHtmlSlideShow' SlidySlides
+
+-- | Convert Pandoc document to Slideous HTML slide show.
+writeSlideous :: PandocMonad m
+ => WriterOptions -> Pandoc -> m String
+writeSlideous = writeHtmlSlideShow' SlideousSlides
+
+-- | Convert Pandoc document to DZSlides HTML slide show.
+writeDZSlides :: PandocMonad m
+ => WriterOptions -> Pandoc -> m String
+writeDZSlides = writeHtmlSlideShow' DZSlides
+
+writeHtmlSlideShow' :: PandocMonad m
+ => HTMLSlideVariant -> WriterOptions -> Pandoc -> m String
+writeHtmlSlideShow' variant = writeHtmlString'
+ defaultWriterState{ stSlideVariant = variant
+ , stHtml5 = case variant of
+ RevealJsSlides -> True
+ S5Slides -> False
+ SlidySlides -> False
+ DZSlides -> True
+ SlideousSlides -> False
+ NoSlides -> False
+ }
+
+writeHtmlString' :: PandocMonad m
+ => WriterState -> WriterOptions -> Pandoc -> m String
+writeHtmlString' st opts d = do
+ (body, context) <- evalStateT (pandocToHtml opts d) st
+ return $ case writerTemplate opts of
+ Nothing -> renderHtml body
+ Just tpl -> renderTemplate' tpl $
+ defField "body" (renderHtml body) context
+
+writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
+writeHtml' st opts d = do
+ (body, context) <- evalStateT (pandocToHtml opts d) st
+ return $ case writerTemplate opts of
+ Nothing -> body
+ Just tpl -> renderTemplate' tpl $
+ defField "body" (renderHtml body) context
+
+-- result is (title, authors, date, toc, body, new variables)
+pandocToHtml :: PandocMonad m
+ => WriterOptions
+ -> Pandoc
+ -> StateT WriterState m (Html, Value)
+pandocToHtml opts (Pandoc meta blocks) = do
+ metadata <- metaToJSON opts
+ (fmap renderHtml . blockListToHtml opts)
+ (fmap renderHtml . inlineListToHtml opts)
+ meta
+ let stringifyHTML = escapeStringForXML . stringify
+ let authsMeta = map stringifyHTML $ docAuthors meta
+ let dateMeta = stringifyHTML $ docDate meta
+ let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
+ slideVariant <- gets stSlideVariant
+ let sects = hierarchicalize $
+ if slideVariant == NoSlides
+ then blocks
+ else prepSlides slideLevel blocks
+ toc <- if writerTableOfContents opts && slideVariant /= S5Slides
+ then tableOfContents opts sects
+ else return Nothing
+ blocks' <- liftM (mconcat . intersperse (nl opts)) $
+ mapM (elementToHtml slideLevel opts) sects
+ st <- get
+ notes <- footnoteSection opts (reverse (stNotes st))
+ let thebody = blocks' >> notes
+ let math = case writerHTMLMathMethod opts of
+ LaTeXMathML (Just url) ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
+ MathJax url ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ case slideVariant of
+ SlideousSlides ->
+ preEscapedString
+ "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
+ _ -> mempty
+ JsMath (Just url) ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
+ KaTeX js css ->
+ (H.script ! A.src (toValue js) $ mempty) <>
+ (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
+ (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
+ _ -> case lookup "mathml-script" (writerVariables opts) of
+ Just s | not (stHtml5 st) ->
+ H.script ! A.type_ "text/javascript"
+ $ preEscapedString
+ ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
+ | otherwise -> mempty
+ Nothing -> mempty
+ let context = (if stHighlighting st
+ then case writerHighlightStyle opts of
+ Just sty -> defField "highlighting-css"
+ (styleToCss sty)
+ Nothing -> id
+ else id) $
+ (if stMath st
+ then defField "math" (renderHtml math)
+ else id) $
+ defField "quotes" (stQuotes st) $
+ maybe id (defField "toc" . renderHtml) toc $
+ defField "author-meta" authsMeta $
+ maybe id (defField "date-meta") (normalizeDate dateMeta) $
+ defField "pagetitle" (stringifyHTML $ docTitle meta) $
+ defField "idprefix" (writerIdentifierPrefix opts) $
+ -- these should maybe be set in pandoc.hs
+ defField "slidy-url"
+ ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $
+ defField "slideous-url" ("slideous" :: String) $
+ defField "revealjs-url" ("reveal.js" :: String) $
+ defField "s5-url" ("s5/default" :: String) $
+ defField "html5" (stHtml5 st) $
+ metadata
+ return (thebody, context)
+
+-- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix
+prefixedId :: WriterOptions -> String -> Attribute
+prefixedId opts s =
+ case s of
+ "" -> mempty
+ _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s
+
+toList :: PandocMonad m
+ => (Html -> Html)
+ -> WriterOptions
+ -> [Html]
+ -> StateT WriterState m Html
+toList listop opts items = do
+ slideVariant <- gets stSlideVariant
+ return $
+ if (writerIncremental opts)
+ then if (slideVariant /= RevealJsSlides)
+ then (listop $ mconcat items) ! A.class_ "incremental"
+ else listop $ mconcat $ map (! A.class_ "fragment") items
+ else listop $ mconcat items
+
+unordList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
+unordList opts = toList H.ul opts . toListItems opts
+
+ordList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
+ordList opts = toList H.ol opts . toListItems opts
+
+defList :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
+defList opts items = toList H.dl opts (items ++ [nl opts])
+
+-- | Construct table of contents from list of elements.
+tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html)
+tableOfContents _ [] = return Nothing
+tableOfContents opts sects = do
+ contents <- mapM (elementToListItem opts) sects
+ let tocList = catMaybes contents
+ if null tocList
+ then return Nothing
+ else Just <$> unordList opts tocList
+
+-- | Convert section number to string
+showSecNum :: [Int] -> String
+showSecNum = concat . intersperse "." . map show
+
+-- | Converts an Element to a list item for a table of contents,
+-- retrieving the appropriate identifier from state.
+elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html)
+-- Don't include the empty headers created in slide shows
+-- shows when an hrule is used to separate slides without a new title:
+elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing
+elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs)
+ | lev <= writerTOCDepth opts = do
+ let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
+ let sectnum = if writerNumberSections opts && not (null num) &&
+ "unnumbered" `notElem` classes
+ then (H.span ! A.class_ "toc-section-number"
+ $ toHtml $ showSecNum num') >> preEscapedString " "
+ else mempty
+ txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText
+ subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
+ subList <- if null subHeads
+ then return mempty
+ else unordList opts subHeads
+ -- in reveal.js, we need #/apples, not #apples:
+ slideVariant <- gets stSlideVariant
+ let revealSlash = ['/' | slideVariant== RevealJsSlides]
+ return $ Just
+ $ if null id'
+ then (H.a $ toHtml txt) >> subList
+ else (H.a ! A.href (toValue $ "#" ++ revealSlash ++
+ writerIdentifierPrefix opts ++ id')
+ $ toHtml txt) >> subList
+elementToListItem _ _ = return Nothing
+
+-- | Convert an Element to Html.
+elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html
+elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block
+elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do
+ slideVariant <- gets stSlideVariant
+ let slide = slideVariant /= NoSlides && level <= slideLevel
+ let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
+ modify $ \st -> st{stSecNum = num'} -- update section number
+ html5 <- gets stHtml5
+ let titleSlide = slide && level < slideLevel
+ header' <- if title' == [Str "\0"] -- marker for hrule
+ then return mempty
+ else do
+ modify (\st -> st{ stElement = True})
+ res <- blockToHtml opts
+ (Header level (id',classes,keyvals) title')
+ modify (\st -> st{ stElement = False})
+ return res
+
+ let isSec (Sec _ _ _ _ _) = True
+ isSec (Blk _) = False
+ let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
+ isPause _ = False
+ let fragmentClass = case slideVariant of
+ RevealJsSlides -> "fragment"
+ _ -> "incremental"
+ let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\""
+ ++ fragmentClass ++ "\">")) :
+ (xs ++ [Blk (RawBlock (Format "html") "</div>")])
+ innerContents <- mapM (elementToHtml slideLevel opts)
+ $ if titleSlide
+ -- title slides have no content of their own
+ then filter isSec elements
+ else case splitBy isPause elements of
+ [] -> []
+ (x:xs) -> x ++ concatMap inDiv xs
+ let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
+ let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
+ ["section" | (slide || writerSectionDivs opts) &&
+ not html5 ] ++
+ ["level" ++ show level | slide || writerSectionDivs opts ]
+ ++ classes
+ let secttag = if html5
+ then H5.section
+ else H.div
+ let attr = (id',classes',keyvals)
+ return $ if titleSlide
+ then (if slideVariant == RevealJsSlides
+ then H5.section
+ else id) $ mconcat $
+ (addAttrs opts attr $ secttag $ header') : innerContents
+ else if writerSectionDivs opts || slide
+ then addAttrs opts attr
+ $ secttag $ inNl $ header' : innerContents
+ else mconcat $ intersperse (nl opts)
+ $ addAttrs opts attr header' : innerContents
+
+-- | Convert list of Note blocks to a footnote <div>.
+-- Assumes notes are sorted.
+footnoteSection :: PandocMonad m
+ => WriterOptions -> [Html] -> StateT WriterState m Html
+footnoteSection opts notes = do
+ html5 <- gets stHtml5
+ slideVariant <- gets stSlideVariant
+ let hrtag = if html5 then H5.hr else H.hr
+ let container x = if html5
+ then H5.section ! A.class_ "footnotes" $ x
+ else if slideVariant /= NoSlides
+ then H.div ! A.class_ "footnotes slide" $ x
+ else H.div ! A.class_ "footnotes" $ x
+ return $
+ if null notes
+ then mempty
+ else nl opts >> (container
+ $ nl opts >> hrtag >> nl opts >>
+ H.ol (mconcat notes >> nl opts) >> nl opts)
+
+-- | Parse a mailto link; return Just (name, domain) or Nothing.
+parseMailto :: String -> Maybe (String, String)
+parseMailto s = do
+ case break (==':') s of
+ (xs,':':addr) | map toLower xs == "mailto" -> do
+ let (name', rest) = span (/='@') addr
+ let domain = drop 1 rest
+ return (name', domain)
+ _ -> fail "not a mailto: URL"
+
+-- | Obfuscate a "mailto:" link.
+obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html
+obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation =
+ return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt
+obfuscateLink opts attr (renderHtml -> txt) s =
+ let meth = writerEmailObfuscation opts
+ s' = map toLower (take 7 s) ++ drop 7 s
+ in case parseMailto s' of
+ (Just (name', domain)) ->
+ let domain' = substitute "." " dot " domain
+ at' = obfuscateChar '@'
+ (linkText, altText) =
+ if txt == drop 7 s' -- autolink
+ then ("e", name' ++ " at " ++ domain')
+ else ("'" ++ obfuscateString txt ++ "'",
+ txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")")
+ in case meth of
+ ReferenceObfuscation ->
+ -- need to use preEscapedString or &'s are escaped to &amp; in URL
+ return $
+ preEscapedString $ "<a href=\"" ++ (obfuscateString s')
+ ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>"
+ JavascriptObfuscation ->
+ return $
+ (H.script ! A.type_ "text/javascript" $
+ preEscapedString ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ obfuscateString name' ++ "';e=n+a+h;\n" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >>
+ H.noscript (preEscapedString $ obfuscateString altText)
+ _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth
+ _ -> return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char =
+ let num = ord char
+ numstr = if even num then show num else "x" ++ showHex num ""
+ in "&#" ++ numstr ++ ";"
+
+-- | Obfuscate string using entities.
+obfuscateString :: String -> String
+obfuscateString = concatMap obfuscateChar . fromEntities
+
+addAttrs :: WriterOptions -> Attr -> Html -> Html
+addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr)
+
+toAttrs :: [(String, String)] -> [Attribute]
+toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs
+
+attrsToHtml :: WriterOptions -> Attr -> [Attribute]
+attrsToHtml opts (id',classes',keyvals) =
+ [prefixedId opts id' | not (null id')] ++
+ [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals
+
+imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute]
+imgAttrsToHtml opts attr =
+ attrsToHtml opts (ident,cls,kvs') ++
+ toAttrs (dimensionsToAttrList opts attr)
+ where
+ (ident,cls,kvs) = attr
+ kvs' = filter isNotDim kvs
+ isNotDim ("width", _) = False
+ isNotDim ("height", _) = False
+ isNotDim _ = True
+
+dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)]
+dimensionsToAttrList opts attr = (go Width) ++ (go Height)
+ where
+ go dir = case (dimension dir attr) of
+ (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))]
+ (Just dim) -> [(show dir, showInPixel opts dim)]
+ _ -> []
+
+
+imageExts :: [String]
+imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
+ "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm",
+ "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff",
+ "wbmp", "xbm", "xpm", "xwd" ]
+
+treatAsImage :: FilePath -> Bool
+treatAsImage fp =
+ let path = case uriPath `fmap` parseURIReference fp of
+ Nothing -> fp
+ Just up -> up
+ ext = map toLower $ drop 1 $ takeExtension path
+ in null ext || ext `elem` imageExts
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
+blockToHtml _ Null = return mempty
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+-- title beginning with fig: indicates that the image is a figure
+blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do
+ img <- inlineToHtml opts (Image attr txt (s,tit))
+ html5 <- gets stHtml5
+ let tocapt = if html5
+ then H5.figcaption
+ else H.p ! A.class_ "caption"
+ capt <- if null txt
+ then return mempty
+ else tocapt `fmap` inlineListToHtml opts txt
+ return $ if html5
+ then H5.figure $ mconcat
+ [nl opts, img, capt, nl opts]
+ else H.div ! A.class_ "figure" $ mconcat
+ [nl opts, img, nl opts, capt, nl opts]
+blockToHtml opts (Para lst)
+ | isEmptyRaw lst = return mempty
+ | otherwise = do
+ contents <- inlineListToHtml opts lst
+ return $ H.p contents
+ where
+ isEmptyRaw [RawInline f _] = f /= (Format "html")
+ isEmptyRaw _ = False
+blockToHtml opts (LineBlock lns) =
+ if writerWrapText opts == WrapNone
+ then blockToHtml opts $ linesToPara lns
+ else do
+ let lf = preEscapedString "\n"
+ htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns
+ return $ H.div ! A.style "white-space: pre-line;" $ htmlLines
+blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do
+ html5 <- gets stHtml5
+ let speakerNotes = "notes" `elem` classes
+ -- we don't want incremental output inside speaker notes, see #1394
+ let opts' = if speakerNotes then opts{ writerIncremental = False } else opts
+ contents <- blockListToHtml opts' bs
+ let contents' = nl opts >> contents >> nl opts
+ let (divtag, classes') = if html5 && "section" `elem` classes
+ then (H5.section, filter (/= "section") classes)
+ else (H.div, classes)
+ slideVariant <- gets stSlideVariant
+ return $
+ if speakerNotes
+ then case slideVariant of
+ RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
+ DZSlides -> (addAttrs opts' attr $ H5.div $ contents')
+ ! (H5.customAttribute "role" "note")
+ NoSlides -> addAttrs opts' attr $ H.div $ contents'
+ _ -> mempty
+ else addAttrs opts (ident, classes', kvs) $ divtag $ contents'
+blockToHtml opts (RawBlock f str)
+ | f == Format "html" = return $ preEscapedString str
+ | (f == Format "latex" || f == Format "tex") &&
+ allowsMathEnvironments (writerHTMLMathMethod opts) &&
+ isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str]
+ | otherwise = do
+ report $ BlockNotRendered (RawBlock f str)
+ return mempty
+blockToHtml _ (HorizontalRule) = do
+ html5 <- gets stHtml5
+ return $ if html5 then H5.hr else H.hr
+blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
+ let tolhs = isEnabled Ext_literate_haskell opts &&
+ any (\c -> map toLower c == "haskell") classes &&
+ any (\c -> map toLower c == "literate") classes
+ classes' = if tolhs
+ then map (\c -> if map toLower c == "haskell"
+ then "literatehaskell"
+ else c) classes
+ else classes
+ adjCode = if tolhs
+ then unlines . map ("> " ++) . lines $ rawCode
+ else rawCode
+ hlCode = if isJust (writerHighlightStyle opts)
+ then highlight formatHtmlBlock
+ (id',classes',keyvals) adjCode
+ else Nothing
+ case hlCode of
+ Nothing -> return $ addAttrs opts (id',classes,keyvals)
+ $ H.pre $ H.code $ toHtml adjCode
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (addAttrs opts (id',[],keyvals) h)
+blockToHtml opts (BlockQuote blocks) = do
+ -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ slideVariant <- gets stSlideVariant
+ if slideVariant /= NoSlides
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList attribs lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (OrderedList attribs lst)
+ [DefinitionList lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (DefinitionList lst)
+ _ -> do contents <- blockListToHtml opts blocks
+ return $ H.blockquote
+ $ nl opts >> contents >> nl opts
+ else do
+ contents <- blockListToHtml opts blocks
+ return $ H.blockquote $ nl opts >> contents >> nl opts
+blockToHtml opts (Header level attr@(_,classes,_) lst) = do
+ contents <- inlineListToHtml opts lst
+ secnum <- liftM stSecNum get
+ let contents' = if writerNumberSections opts && not (null secnum)
+ && "unnumbered" `notElem` classes
+ then (H.span ! A.class_ "header-section-number" $ toHtml
+ $ showSecNum secnum) >> strToHtml " " >> contents
+ else contents
+ inElement <- gets stElement
+ return $ (if inElement then id else addAttrs opts attr)
+ $ case level of
+ 1 -> H.h1 contents'
+ 2 -> H.h2 contents'
+ 3 -> H.h3 contents'
+ 4 -> H.h4 contents'
+ 5 -> H.h5 contents'
+ 6 -> H.h6 contents'
+ _ -> H.p contents'
+blockToHtml opts (BulletList lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ unordList opts contents
+blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ html5 <- gets stHtml5
+ let numstyle' = case numstyle of
+ Example -> "decimal"
+ _ -> camelCaseToHyphenated $ show numstyle
+ let attribs = (if startnum /= 1
+ then [A.start $ toValue startnum]
+ else []) ++
+ (if numstyle == Example
+ then [A.class_ "example"]
+ else []) ++
+ (if numstyle /= DefaultStyle
+ then if html5
+ then [A.type_ $
+ case numstyle of
+ Decimal -> "1"
+ LowerAlpha -> "a"
+ UpperAlpha -> "A"
+ LowerRoman -> "i"
+ UpperRoman -> "I"
+ _ -> "1"]
+ else [A.style $ toValue $ "list-style-type: " ++
+ numstyle']
+ else [])
+ l <- ordList opts contents
+ return $ foldl (!) l attribs
+blockToHtml opts (DefinitionList lst) = do
+ contents <- mapM (\(term, defs) ->
+ do term' <- if null term
+ then return mempty
+ else liftM H.dt $ inlineListToHtml opts term
+ defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
+ blockListToHtml opts) defs
+ return $ mconcat $ nl opts : term' : nl opts :
+ intersperse (nl opts) defs') lst
+ defList opts contents
+blockToHtml opts (Table capt aligns widths headers rows') = do
+ captionDoc <- if null capt
+ then return mempty
+ else do
+ cs <- inlineListToHtml opts capt
+ return $ H.caption cs >> nl opts
+ html5 <- gets stHtml5
+ let percent w = show (truncate (100*w) :: Integer) ++ "%"
+ let coltags = if all (== 0.0) widths
+ then mempty
+ else do
+ H.colgroup $ do
+ nl opts
+ mapM_ (\w -> do
+ if html5
+ then H.col ! A.style (toValue $ "width: " ++
+ percent w)
+ else H.col ! A.width (toValue $ percent w)
+ nl opts) widths
+ nl opts
+ head' <- if all null headers
+ then return mempty
+ else do
+ contents <- tableRowToHtml opts aligns 0 headers
+ return $ H.thead (nl opts >> contents) >> nl opts
+ body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $
+ zipWithM (tableRowToHtml opts aligns) [1..] rows'
+ let tbl = H.table $
+ nl opts >> captionDoc >> coltags >> head' >> body' >> nl opts
+ let totalWidth = sum widths
+ -- When widths of columns are < 100%, we need to set width for the whole
+ -- table, or some browsers give us skinny columns with lots of space between:
+ return $ if totalWidth == 0 || totalWidth == 1
+ then tbl
+ else tbl ! A.style (toValue $ "width:" ++
+ show (round (totalWidth * 100) :: Int) ++ "%;")
+
+tableRowToHtml :: PandocMonad m
+ => WriterOptions
+ -> [Alignment]
+ -> Int
+ -> [[Block]]
+ -> StateT WriterState m Html
+tableRowToHtml opts aligns rownum cols' = do
+ let mkcell = if rownum == 0 then H.th else H.td
+ let rowclass = case rownum of
+ 0 -> "header"
+ x | x `rem` 2 == 1 -> "odd"
+ _ -> "even"
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToHtml opts mkcell alignment item)
+ aligns cols'
+ return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'')
+ >> nl opts
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> ""
+
+tableItemToHtml :: PandocMonad m
+ => WriterOptions
+ -> (Html -> Html)
+ -> Alignment
+ -> [Block]
+ -> StateT WriterState m Html
+tableItemToHtml opts tag' align' item = do
+ contents <- blockListToHtml opts item
+ html5 <- gets stHtml5
+ let alignStr = alignmentToString align'
+ let attribs = if html5
+ then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
+ else A.align (toValue alignStr)
+ let tag'' = if null alignStr
+ then tag'
+ else tag' ! attribs
+ return $ (tag'' $ contents) >> nl opts
+
+toListItems :: WriterOptions -> [Html] -> [Html]
+toListItems opts items = map (toListItem opts) items ++ [nl opts]
+
+toListItem :: WriterOptions -> Html -> Html
+toListItem opts item = nl opts >> H.li item
+
+blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html
+blockListToHtml opts lst =
+ fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html
+inlineListToHtml opts lst =
+ mapM (inlineToHtml opts) lst >>= return . mconcat
+
+-- | Annotates a MathML expression with the tex source
+annotateMML :: XML.Element -> String -> XML.Element
+annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)])
+ where
+ cs = case elChildren e of
+ [] -> unode "mrow" ()
+ [x] -> x
+ xs -> unode "mrow" xs
+ math childs = XML.Element q as [XML.Elem childs] l
+ where
+ (XML.Element q as _ l) = e
+ annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"]
+
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: PandocMonad m
+ => WriterOptions -> Inline -> StateT WriterState m Html
+inlineToHtml opts inline = do
+ html5 <- gets stHtml5
+ case inline of
+ (Str str) -> return $ strToHtml str
+ (Space) -> return $ strToHtml " "
+ (SoftBreak) -> return $ case writerWrapText opts of
+ WrapNone -> preEscapedString " "
+ WrapAuto -> preEscapedString " "
+ WrapPreserve -> preEscapedString "\n"
+ (LineBreak) -> return $ (if html5 then H5.br else H.br)
+ <> strToHtml "\n"
+ (Span (id',classes,kvs) ils)
+ -> inlineListToHtml opts ils >>=
+ return . addAttrs opts attr' . H.span
+ where attr' = (id',classes',kvs')
+ classes' = filter (`notElem` ["csl-no-emph",
+ "csl-no-strong",
+ "csl-no-smallcaps"]) classes
+ kvs' = if null styles
+ then kvs
+ else (("style", concat styles) : kvs)
+ styles = ["font-style:normal;"
+ | "csl-no-emph" `elem` classes]
+ ++ ["font-weight:normal;"
+ | "csl-no-strong" `elem` classes]
+ ++ ["font-variant:normal;"
+ | "csl-no-smallcaps" `elem` classes]
+ (Emph lst) -> inlineListToHtml opts lst >>= return . H.em
+ (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong
+ (Code attr str) -> case hlCode of
+ Nothing -> return
+ $ addAttrs opts attr
+ $ H.code $ strToHtml str
+ Just h -> do
+ modify $ \st -> st{ stHighlighting = True }
+ return $ addAttrs opts (id',[],keyvals) h
+ where (id',_,keyvals) = attr
+ hlCode = if isJust (writerHighlightStyle opts)
+ then highlight formatHtmlInline
+ attr str
+ else Nothing
+ (Strikeout lst) -> inlineListToHtml opts lst >>=
+ return . H.del
+ (SmallCaps lst) -> inlineListToHtml opts lst >>=
+ return . (H.span ! A.style "font-variant: small-caps;")
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub
+ (Quoted quoteType lst) ->
+ let (leftQuote, rightQuote) = case quoteType of
+ SingleQuote -> (strToHtml "‘",
+ strToHtml "’")
+ DoubleQuote -> (strToHtml "“",
+ strToHtml "”")
+ in if writerHtmlQTags opts
+ then do
+ modify $ \st -> st{ stQuotes = True }
+ H.q `fmap` inlineListToHtml opts lst
+ else (\x -> leftQuote >> x >> rightQuote)
+ `fmap` inlineListToHtml opts lst
+ (Math t str) -> do
+ modify (\st -> st {stMath = True})
+ let mathClass = toValue $ ("math " :: String) ++
+ if t == InlineMath then "inline" else "display"
+ case writerHTMLMathMethod opts of
+ LaTeXMathML _ ->
+ -- putting LaTeXMathML in container with class "LaTeX" prevents
+ -- non-math elements on the page from being treated as math by
+ -- the javascript
+ return $ H.span ! A.class_ "LaTeX" $
+ case t of
+ InlineMath -> toHtml ("$" ++ str ++ "$")
+ DisplayMath -> toHtml ("$$" ++ str ++ "$$")
+ JsMath _ -> do
+ let m = preEscapedString str
+ return $ case t of
+ InlineMath -> H.span ! A.class_ mathClass $ m
+ DisplayMath -> H.div ! A.class_ mathClass $ m
+ WebTeX url -> do
+ let imtag = if html5 then H5.img else H.img
+ let m = imtag ! A.style "vertical-align:middle"
+ ! A.src (toValue $ url ++ urlEncode str)
+ ! A.alt (toValue str)
+ ! A.title (toValue str)
+ let brtag = if html5 then H5.br else H.br
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> brtag >> m >> brtag
+ GladTeX ->
+ return $ case t of
+ InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
+ DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
+ MathML -> do
+ let conf = useShortEmptyTags (const False)
+ defaultConfigPP
+ res <- lift $ convertMath writeMathML t str
+ case res of
+ Right r -> return $ preEscapedString $
+ ppcElement conf (annotateMML r str)
+ Left il -> (H.span ! A.class_ mathClass) <$>
+ inlineToHtml opts il
+ MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $
+ case t of
+ InlineMath -> "\\(" ++ str ++ "\\)"
+ DisplayMath -> "\\[" ++ str ++ "\\]"
+ KaTeX _ _ -> return $ H.span ! A.class_ mathClass $
+ toHtml (case t of
+ InlineMath -> str
+ DisplayMath -> "\\displaystyle " ++ str)
+ PlainMath -> do
+ x <- lift (texMathToInlines t str) >>= inlineListToHtml opts
+ let m = H.span ! A.class_ mathClass $ x
+ let brtag = if html5 then H5.br else H.br
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> brtag >> m >> brtag
+ (RawInline f str)
+ | f == Format "html" -> return $ preEscapedString str
+ | otherwise -> do
+ report $ InlineNotRendered inline
+ return mempty
+ (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do
+ linkText <- inlineListToHtml opts txt
+ lift $ obfuscateLink opts attr linkText s
+ (Link attr txt (s,tit)) -> do
+ linkText <- inlineListToHtml opts txt
+ slideVariant <- gets stSlideVariant
+ let s' = case s of
+ '#':xs -> let prefix = if slideVariant == RevealJsSlides
+ then "/"
+ else writerIdentifierPrefix opts
+ in '#' : prefix ++ xs
+ _ -> s
+ let link = H.a ! A.href (toValue s') $ linkText
+ let link' = if txt == [Str (unEscapeString s)]
+ then link ! A.class_ "uri"
+ else link
+ let link'' = addAttrs opts attr link'
+ return $ if null tit
+ then link''
+ else link'' ! A.title (toValue tit)
+ (Image attr txt (s,tit)) | treatAsImage s -> do
+ let alternate' = stringify txt
+ let attributes = [A.src $ toValue s] ++
+ [A.title $ toValue tit | not (null tit)] ++
+ [A.alt $ toValue alternate' | not (null txt)] ++
+ imgAttrsToHtml opts attr
+ let tag = if html5 then H5.img else H.img
+ return $ foldl (!) tag attributes
+ -- note: null title included, as in Markdown.pl
+ (Image attr _ (s,tit)) -> do
+ let attributes = [A.src $ toValue s] ++
+ [A.title $ toValue tit | not (null tit)] ++
+ imgAttrsToHtml opts attr
+ return $ foldl (!) H5.embed attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
+ notes <- gets stNotes
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ epubVersion <- gets stEPUBVersion
+ -- push contents onto front of notes
+ modify $ \st -> st {stNotes = (htmlContents:notes)}
+ slideVariant <- gets stSlideVariant
+ let revealSlash = ['/' | slideVariant == RevealJsSlides]
+ let link = H.a ! A.href (toValue $ "#" ++
+ revealSlash ++
+ writerIdentifierPrefix opts ++ "fn" ++ ref)
+ ! A.class_ "footnoteRef"
+ ! prefixedId opts ("fnref" ++ ref)
+ $ (if isJust epubVersion
+ then id
+ else H.sup)
+ $ toHtml ref
+ return $ case epubVersion of
+ Just EPUB3 -> link ! customAttribute "epub:type" "noteref"
+ _ -> link
+ (Cite cits il)-> do contents <- inlineListToHtml opts il
+ let citationIds = unwords $ map citationId cits
+ let result = H.span ! A.class_ "citation" $ contents
+ return $ if html5
+ then result ! customAttribute "data-cites" (toValue citationIds)
+ else result
+
+blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html
+blockListToNote opts ref blocks =
+ -- If last block is Para or Plain, include the backlink at the end of
+ -- that block. Otherwise, insert a new Plain block with the backlink.
+ let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])]
+ blocks' = if null blocks
+ then []
+ else let lastBlock = last blocks
+ otherBlocks = init blocks
+ in case lastBlock of
+ (Para lst) -> otherBlocks ++
+ [Para (lst ++ backlink)]
+ (Plain lst) -> otherBlocks ++
+ [Plain (lst ++ backlink)]
+ _ -> otherBlocks ++ [lastBlock,
+ Plain backlink]
+ in do contents <- blockListToHtml opts blocks'
+ let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents
+ epubVersion <- gets stEPUBVersion
+ let noteItem' = case epubVersion of
+ Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote"
+ _ -> noteItem
+ return $ nl opts >> noteItem'
+
+-- Javascript snippet to render all KaTeX elements
+renderKaTeX :: String
+renderKaTeX = unlines [
+ "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");"
+ , "for (var i=0; i < mathElements.length; i++)"
+ , "{"
+ , " var texText = mathElements[i].firstChild"
+ , " katex.render(texText.data, mathElements[i])"
+ , "}}"
+ ]
+
+isMathEnvironment :: String -> Bool
+isMathEnvironment s = "\\begin{" `isPrefixOf` s &&
+ envName `elem` mathmlenvs
+ where envName = takeWhile (/= '}') (drop 7 s)
+ mathmlenvs = [ "align"
+ , "align*"
+ , "alignat"
+ , "alignat*"
+ , "aligned"
+ , "alignedat"
+ , "array"
+ , "Bmatrix"
+ , "bmatrix"
+ , "cases"
+ , "CD"
+ , "eqnarray"
+ , "eqnarray*"
+ , "equation"
+ , "equation*"
+ , "gather"
+ , "gather*"
+ , "gathered"
+ , "matrix"
+ , "multline"
+ , "multline*"
+ , "pmatrix"
+ , "smallmatrix"
+ , "split"
+ , "subarray"
+ , "Vmatrix"
+ , "vmatrix" ]
+
+allowsMathEnvironments :: HTMLMathMethod -> Bool
+allowsMathEnvironments (MathJax _) = True
+allowsMathEnvironments (MathML) = True
+allowsMathEnvironments (WebTeX _) = True
+allowsMathEnvironments _ = False
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
new file mode 100644
index 000000000..945e4a0f1
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -0,0 +1,370 @@
+{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Haddock
+ Copyright : Copyright (C) 2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to haddock markup.
+
+Haddock: <http://www.haskell.org/haddock/doc/html/>
+-}
+module Text.Pandoc.Writers.Haddock (writeHaddock) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Data.List ( intersperse, transpose )
+import Text.Pandoc.Pretty
+import Control.Monad.State
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Network.URI (isURI)
+import Data.Default
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+type Notes = [[Block]]
+data WriterState = WriterState { stNotes :: Notes }
+instance Default WriterState
+ where def = WriterState{ stNotes = [] }
+
+-- | Convert Pandoc to Haddock.
+writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeHaddock opts document =
+ evalStateT (pandocToHaddock opts{
+ writerWrapText = writerWrapText opts } document) def
+
+-- | Return haddock representation of document.
+pandocToHaddock :: PandocMonad m
+ => WriterOptions -> Pandoc -> StateT WriterState m String
+pandocToHaddock opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ body <- blockListToHaddock opts blocks
+ st <- get
+ notes' <- notesToHaddock opts (reverse $ stNotes st)
+ let render' :: Doc -> String
+ render' = render colwidth
+ let main = render' $ body <>
+ (if isEmpty notes' then empty else blankline <> notes')
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToHaddock opts)
+ (fmap (render colwidth) . inlineListToHaddock opts)
+ meta
+ let context = defField "body" main
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Return haddock representation of notes.
+notesToHaddock :: PandocMonad m
+ => WriterOptions -> [[Block]] -> StateT WriterState m Doc
+notesToHaddock opts notes =
+ if null notes
+ then return empty
+ else do
+ contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes
+ return $ text "#notes#" <> blankline <> contents
+
+-- | Escape special characters for Haddock.
+escapeString :: String -> String
+escapeString = escapeStringUsing haddockEscapes
+ where haddockEscapes = backslashEscapes "\\/'`\"@<"
+
+-- | Convert Pandoc block element to haddock.
+blockToHaddock :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> StateT WriterState m Doc
+blockToHaddock _ Null = return empty
+blockToHaddock opts (Div _ ils) = do
+ contents <- blockListToHaddock opts ils
+ return $ contents <> blankline
+blockToHaddock opts (Plain inlines) = do
+ contents <- inlineListToHaddock opts inlines
+ return $ contents <> cr
+-- title beginning with fig: indicates figure
+blockToHaddock opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
+ blockToHaddock opts (Para [Image attr alt (src,tit)])
+blockToHaddock opts (Para inlines) =
+ -- TODO: if it contains linebreaks, we need to use a @...@ block
+ (<> blankline) `fmap` blockToHaddock opts (Plain inlines)
+blockToHaddock opts (LineBlock lns) =
+ blockToHaddock opts $ linesToPara lns
+blockToHaddock _ b@(RawBlock f str)
+ | f == "haddock" = do
+ return $ text str <> text "\n"
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToHaddock opts HorizontalRule =
+ return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline
+blockToHaddock opts (Header level (ident,_,_) inlines) = do
+ contents <- inlineListToHaddock opts inlines
+ let attr' = if null ident
+ then empty
+ else cr <> text "#" <> text ident <> text "#"
+ return $ nowrap (text (replicate level '=') <> space <> contents)
+ <> attr' <> blankline
+blockToHaddock _ (CodeBlock (_,_,_) str) =
+ return $ prefixed "> " (text str) <> blankline
+-- Nothing in haddock corresponds to block quotes:
+blockToHaddock opts (BlockQuote blocks) =
+ blockListToHaddock opts blocks
+-- Haddock doesn't have tables. Use haddock tables in code.
+blockToHaddock opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToHaddock opts caption
+ let caption'' = if null caption
+ then empty
+ else blankline <> caption' <> blankline
+ rawHeaders <- mapM (blockListToHaddock opts) headers
+ rawRows <- mapM (mapM (blockListToHaddock opts)) rows
+ let isSimple = all (==0) widths
+ let isPlainBlock (Plain _) = True
+ isPlainBlock _ = False
+ let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
+ (nst,tbl) <- case True of
+ _ | isSimple -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | not hasBlocks -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | otherwise -> fmap (id,) $
+ gridTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline
+blockToHaddock opts (BulletList items) = do
+ contents <- mapM (bulletListItemToHaddock opts) items
+ return $ cat contents <> blankline
+blockToHaddock opts (OrderedList (start,_,delim) items) = do
+ let attribs = (start, Decimal, delim)
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $
+ zip markers' items
+ return $ cat contents <> blankline
+blockToHaddock opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToHaddock opts) items
+ return $ cat contents <> blankline
+
+pandocTable :: PandocMonad m
+ => WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> StateT WriterState m Doc
+pandocTable opts headless aligns widths rawHeaders rawRows = do
+ let isSimple = all (==0) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> lblock
+ AlignCenter -> cblock
+ AlignRight -> rblock
+ AlignDefault -> lblock
+ let numChars = maximum . map offset
+ let widthsInChars = if isSimple
+ then map ((+2) . numChars)
+ $ transpose (rawHeaders : rawRows)
+ else map
+ (floor . (fromIntegral (writerColumns opts) *))
+ widths
+ let makeRow = hcat . intersperse (lblock 1 (text " ")) .
+ (zipWith3 alignHeader aligns widthsInChars)
+ let rows' = map makeRow rawRows
+ let head' = makeRow rawHeaders
+ let maxRowHeight = maximum $ map height (head':rows')
+ let underline = cat $ intersperse (text " ") $
+ map (\width -> text (replicate width '-')) widthsInChars
+ let border = if maxRowHeight > 1
+ then text (replicate (sum widthsInChars +
+ length widthsInChars - 1) '-')
+ else if headless
+ then underline
+ else empty
+ let head'' = if headless
+ then empty
+ else border <> cr <> head'
+ let body = if maxRowHeight > 1
+ then vsep rows'
+ else vcat rows'
+ let bottom = if headless
+ then underline
+ else border
+ return $ head'' $$ underline $$ body $$ bottom
+
+gridTable :: PandocMonad m
+ => WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> StateT WriterState m Doc
+gridTable opts headless _aligns widths headers' rawRows = do
+ let numcols = length headers'
+ let widths' = if all (==0) widths
+ then replicate numcols (1.0 / fromIntegral numcols)
+ else widths
+ let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths'
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (map height blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = chomp $ hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow headers'
+ let rows' = map (makeRow . map chomp) rawRows
+ let border ch = char '+' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '+'
+ let body = vcat $ intersperse (border '-') rows'
+ let head'' = if headless
+ then empty
+ else head' $$ border '='
+ return $ border '-' $$ head'' $$ body $$ border '-'
+
+-- | Convert bullet list item (list of blocks) to haddock
+bulletListItemToHaddock :: PandocMonad m
+ => WriterOptions -> [Block] -> StateT WriterState m Doc
+bulletListItemToHaddock opts items = do
+ contents <- blockListToHaddock opts items
+ let sps = replicate (writerTabStop opts - 2) ' '
+ let start = text ('-' : ' ' : sps)
+ -- remove trailing blank line if it is a tight list
+ let contents' = case reverse items of
+ (BulletList xs:_) | isTightList xs ->
+ chomp contents <> cr
+ (OrderedList _ xs:_) | isTightList xs ->
+ chomp contents <> cr
+ _ -> contents
+ return $ hang (writerTabStop opts) start $ contents' <> cr
+
+-- | Convert ordered list item (a list of blocks) to haddock
+orderedListItemToHaddock :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> StateT WriterState m Doc
+orderedListItemToHaddock opts marker items = do
+ contents <- blockListToHaddock opts items
+ let sps = case length marker - writerTabStop opts of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ let start = text marker <> sps
+ return $ hang (writerTabStop opts) start $ contents <> cr
+
+-- | Convert definition list item (label, list of blocks) to haddock
+definitionListItemToHaddock :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> StateT WriterState m Doc
+definitionListItemToHaddock opts (label, defs) = do
+ labelText <- inlineListToHaddock opts label
+ defs' <- mapM (mapM (blockToHaddock opts)) defs
+ let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs'
+ return $ nowrap (brackets labelText) <> cr <> contents <> cr
+
+-- | Convert list of Pandoc block elements to haddock
+blockListToHaddock :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
+blockListToHaddock opts blocks =
+ mapM (blockToHaddock opts) blocks >>= return . cat
+
+-- | Convert list of Pandoc inline elements to haddock.
+inlineListToHaddock :: PandocMonad m
+ => WriterOptions -> [Inline] -> StateT WriterState m Doc
+inlineListToHaddock opts lst =
+ mapM (inlineToHaddock opts) lst >>= return . cat
+
+-- | Convert Pandoc inline element to haddock.
+inlineToHaddock :: PandocMonad m
+ => WriterOptions -> Inline -> StateT WriterState m Doc
+inlineToHaddock opts (Span (ident,_,_) ils) = do
+ contents <- inlineListToHaddock opts ils
+ if not (null ident) && null ils
+ then return $ "#" <> text ident <> "#"
+ else return contents
+inlineToHaddock opts (Emph lst) = do
+ contents <- inlineListToHaddock opts lst
+ return $ "/" <> contents <> "/"
+inlineToHaddock opts (Strong lst) = do
+ contents <- inlineListToHaddock opts lst
+ return $ "__" <> contents <> "__"
+inlineToHaddock opts (Strikeout lst) = do
+ contents <- inlineListToHaddock opts lst
+ -- not supported in haddock, but we fake it:
+ return $ "~~" <> contents <> "~~"
+-- not supported in haddock:
+inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst
+-- not supported in haddock:
+inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst
+-- not supported in haddock:
+inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst
+inlineToHaddock opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToHaddock opts lst
+ return $ "‘" <> contents <> "’"
+inlineToHaddock opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToHaddock opts lst
+ return $ "“" <> contents <> "”"
+inlineToHaddock _ (Code _ str) =
+ return $ "@" <> text (escapeString str) <> "@"
+inlineToHaddock _ (Str str) = do
+ return $ text $ escapeString str
+inlineToHaddock opts (Math mt str) = do
+ let adjust x = case mt of
+ DisplayMath -> cr <> x <> cr
+ InlineMath -> x
+ adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts)
+inlineToHaddock _ il@(RawInline f str)
+ | f == "haddock" = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+-- no line break in haddock (see above on CodeBlock)
+inlineToHaddock _ LineBreak = return cr
+inlineToHaddock opts SoftBreak =
+ case writerWrapText opts of
+ WrapAuto -> return space
+ WrapNone -> return space
+ WrapPreserve -> return cr
+inlineToHaddock _ Space = return space
+inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst
+inlineToHaddock _ (Link _ txt (src, _)) = do
+ let linktext = text $ escapeString $ stringify txt
+ let useAuto = isURI src &&
+ case txt of
+ [Str s] | escapeURI s == src -> True
+ _ -> False
+ return $ nowrap $ "<" <> text src <>
+ (if useAuto then empty else space <> linktext) <> ">"
+inlineToHaddock opts (Image attr alternate (source, tit)) = do
+ linkhaddock <- inlineToHaddock opts (Link attr alternate (source, tit))
+ return $ "<" <> linkhaddock <> ">"
+-- haddock doesn't have notes, but we can fake it:
+inlineToHaddock opts (Note contents) = do
+ modify (\st -> st{ stNotes = contents : stNotes st })
+ st <- get
+ let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st)
+ return $ "<#notes [" <> ref <> "]>"
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
new file mode 100644
index 000000000..efec17d26
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -0,0 +1,584 @@
+{-# LANGUAGE OverloadedStrings, FlexibleContexts, ScopedTypeVariables #-}
+
+{- |
+ Module : Text.Pandoc.Writers.ICML
+ Copyright : Copyright (C) 2013-2016 github.com/mb21
+ License : GNU GPL, version 2 or above
+
+ Stability : alpha
+
+Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format
+which is a subset of the zipped IDML format for which the documentation is
+available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf
+InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated
+into InDesign with File -> Place.
+-}
+module Text.Pandoc.Writers.ICML (writeICML) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.XML
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Shared (linesToPara, splitBy)
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse)
+import Data.Text as Text (breakOnAll, pack)
+import Control.Monad.State
+import Control.Monad.Except (runExceptT)
+import Network.URI (isURI)
+import qualified Data.Set as Set
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import qualified Text.Pandoc.Class as P
+
+type Style = [String]
+type Hyperlink = [(Int, String)]
+
+data WriterState = WriterState{
+ blockStyles :: Set.Set String
+ , inlineStyles :: Set.Set String
+ , links :: Hyperlink
+ , listDepth :: Int
+ , maxListDepth :: Int
+ }
+
+type WS m = StateT WriterState m
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+ blockStyles = Set.empty
+ , inlineStyles = Set.empty
+ , links = []
+ , listDepth = 1
+ , maxListDepth = 0
+ }
+
+-- inline names (appear in InDesign's character styles pane)
+emphName :: String
+strongName :: String
+strikeoutName :: String
+superscriptName :: String
+subscriptName :: String
+smallCapsName :: String
+codeName :: String
+linkName :: String
+emphName = "Italic"
+strongName = "Bold"
+strikeoutName = "Strikeout"
+superscriptName = "Superscript"
+subscriptName = "Subscript"
+smallCapsName = "SmallCaps"
+codeName = "Code"
+linkName = "Link"
+
+-- block element names (appear in InDesign's paragraph styles pane)
+paragraphName :: String
+figureName :: String
+imgCaptionName :: String
+codeBlockName :: String
+blockQuoteName :: String
+orderedListName :: String
+bulletListName :: String
+defListTermName :: String
+defListDefName :: String
+headerName :: String
+tableName :: String
+tableHeaderName :: String
+tableCaptionName :: String
+alignLeftName :: String
+alignRightName :: String
+alignCenterName :: String
+firstListItemName :: String
+beginsWithName :: String
+lowerRomanName :: String
+upperRomanName :: String
+lowerAlphaName :: String
+upperAlphaName :: String
+subListParName :: String
+footnoteName :: String
+citeName :: String
+paragraphName = "Paragraph"
+figureName = "Figure"
+imgCaptionName = "Caption"
+codeBlockName = "CodeBlock"
+blockQuoteName = "Blockquote"
+orderedListName = "NumList"
+bulletListName = "BulList"
+defListTermName = "DefListTerm"
+defListDefName = "DefListDef"
+headerName = "Header"
+tableName = "TablePar"
+tableHeaderName = "TableHeader"
+tableCaptionName = "TableCaption"
+alignLeftName = "LeftAlign"
+alignRightName = "RightAlign"
+alignCenterName = "CenterAlign"
+firstListItemName = "first"
+beginsWithName = "beginsWith-"
+lowerRomanName = "lowerRoman"
+upperRomanName = "upperRoman"
+lowerAlphaName = "lowerAlpha"
+upperAlphaName = "upperAlpha"
+subListParName = "subParagraph"
+footnoteName = "Footnote"
+citeName = "Cite"
+
+-- | Convert Pandoc document to string in ICML format.
+writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeICML opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState
+ metadata <- metaToJSON opts
+ (renderMeta blocksToICML)
+ (renderMeta inlinesToICML)
+ meta
+ (doc, st) <- runStateT (blocksToICML opts [] blocks) defaultWriterState
+ let main = render' doc
+ context = defField "body" main
+ $ defField "charStyles" (render' $ charStylesToDoc st)
+ $ defField "parStyles" (render' $ parStylesToDoc st)
+ $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
+ $ metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Auxilary functions for parStylesToDoc and charStylesToDoc.
+contains :: String -> (String, (String, String)) -> [(String, String)]
+contains s rule =
+ if isInfixOf (fst rule) s
+ then [snd rule]
+ else []
+
+-- | The monospaced font to use as default.
+monospacedFont :: Doc
+monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New"
+
+-- | How much to indent blockquotes etc.
+defaultIndent :: Int
+defaultIndent = 20
+
+-- | How much to indent numbered lists before the number.
+defaultListIndent :: Int
+defaultListIndent = 10
+
+-- other constants
+lineSeparator :: String
+lineSeparator = "&#x2028;"
+
+-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles.
+parStylesToDoc :: WriterState -> Doc
+parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
+ where
+ makeStyle s =
+ let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str)
+ attrs = concat $ map (contains s) $ [
+ (defListTermName, ("BulletsAndNumberingListType", "BulletList"))
+ , (defListTermName, ("FontStyle", "Bold"))
+ , (tableHeaderName, ("FontStyle", "Bold"))
+ , (alignLeftName, ("Justification", "LeftAlign"))
+ , (alignRightName, ("Justification", "RightAlign"))
+ , (alignCenterName, ("Justification", "CenterAlign"))
+ , (headerName++"1", ("PointSize", "36"))
+ , (headerName++"2", ("PointSize", "30"))
+ , (headerName++"3", ("PointSize", "24"))
+ , (headerName++"4", ("PointSize", "18"))
+ , (headerName++"5", ("PointSize", "14"))
+ ]
+ -- what is the most nested list type, if any?
+ (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s
+ where
+ findList [] = (False, False)
+ findList (x:xs) | x == bulletListName = (True, False)
+ | x == orderedListName = (False, True)
+ | otherwise = findList xs
+ nBuls = countSubStrs bulletListName s
+ nOrds = countSubStrs orderedListName s
+ attrs' = numbering ++ listType ++ indent ++ attrs
+ where
+ numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)]
+ | otherwise = []
+ listType | isOrderedList && (not $ isInfixOf subListParName s)
+ = [("BulletsAndNumberingListType", "NumberedList")]
+ | isBulletList && (not $ isInfixOf subListParName s)
+ = [("BulletsAndNumberingListType", "BulletList")]
+ | otherwise = []
+ indent = [("LeftIndent", show indt)]
+ where
+ nBlockQuotes = countSubStrs blockQuoteName s
+ nDefLists = countSubStrs defListDefName s
+ indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists)
+ props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm)
+ where
+ font = if isInfixOf codeBlockName s
+ then monospacedFont
+ else empty
+ basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font
+ tabList = if isBulletList
+ then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")]
+ $ vcat [
+ inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign"
+ , inTags False "AlignmentCharacter" [("type","string")] $ text "."
+ , selfClosingTag "Leader" [("type","string")]
+ , inTags False "Position" [("type","unit")] $ text
+ $ show $ defaultListIndent * (nBuls + nOrds)
+ ]
+ else empty
+ makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name)
+ numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..."
+ | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..."
+ | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..."
+ | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..."
+ | otherwise = empty
+ in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles.
+charStylesToDoc :: WriterState -> Doc
+charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
+ where
+ makeStyle s =
+ let attrs = concat $ map (contains s) [
+ (strikeoutName, ("StrikeThru", "true"))
+ , (superscriptName, ("Position", "Superscript"))
+ , (subscriptName, ("Position", "Subscript"))
+ , (smallCapsName, ("Capitalization", "SmallCaps"))
+ ]
+ attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs
+ | isInfixOf strongName s = ("FontStyle", "Bold") : attrs
+ | isInfixOf emphName s = ("FontStyle", "Italic") : attrs
+ | otherwise = attrs
+ props = inTags True "Properties" [] $
+ inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font
+ where
+ font =
+ if isInfixOf codeName s
+ then monospacedFont
+ else empty
+ in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Escape colon characters as %3a
+escapeColons :: String -> String
+escapeColons (x:xs)
+ | x == ':' = "%3a" ++ escapeColons xs
+ | otherwise = x : escapeColons xs
+escapeColons [] = []
+
+-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks.
+hyperlinksToDoc :: Hyperlink -> Doc
+hyperlinksToDoc [] = empty
+hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
+ where
+ hyp (ident, url) = hdest $$ hlink
+ where
+ hdest = selfClosingTag "HyperlinkURLDestination"
+ [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6
+ hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url),
+ ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")]
+ $ inTags True "Properties" []
+ $ inTags False "BorderColor" [("type","enumeration")] (text "Black")
+ $$ (inTags False "Destination" [("type","object")]
+ $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6
+
+
+-- | Convert a list of Pandoc blocks to ICML.
+blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
+blocksToICML opts style lst = do
+ docs <- mapM (blockToICML opts style) lst
+ return $ intersperseBrs docs
+
+-- | Convert a Pandoc block element to ICML.
+blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc
+blockToICML opts style (Plain lst) = parStyle opts style lst
+-- title beginning with fig: indicates that the image is a figure
+blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do
+ figure <- parStyle opts (figureName:style) img
+ caption <- parStyle opts (imgCaptionName:style) txt
+ return $ intersperseBrs [figure, caption]
+blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
+blockToICML opts style (LineBlock lns) =
+ blockToICML opts style $ linesToPara lns
+blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
+blockToICML _ _ b@(RawBlock f str)
+ | f == Format "icml" = return $ text str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
+blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
+blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
+blockToICML opts style (DefinitionList lst) = intersperseBrs `fmap` mapM (definitionListItemToICML opts style) lst
+blockToICML opts style (Header lvl _ lst) =
+ let stl = (headerName ++ show lvl):style
+ in parStyle opts stl lst
+blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead
+blockToICML opts style (Table caption aligns widths headers rows) =
+ let style' = tableName : style
+ noHeader = all null headers
+ nrHeaders = if noHeader
+ then "0"
+ else "1"
+ nrRows = length rows
+ nrCols = if null rows
+ then 0
+ else length $ head rows
+ rowsToICML [] _ = return empty
+ rowsToICML (col:rest) rowNr =
+ liftM2 ($$) (colsToICML col aligns rowNr (0::Int)) $ rowsToICML rest (rowNr+1)
+ colsToICML [] _ _ _ = return empty
+ colsToICML _ [] _ _ = return empty
+ colsToICML (cell:rest) (alig:restAligns) rowNr colNr = do
+ let stl = if rowNr == 0 && not noHeader
+ then tableHeaderName:style'
+ else style'
+ stl' | alig == AlignLeft = alignLeftName : stl
+ | alig == AlignRight = alignRightName : stl
+ | alig == AlignCenter = alignCenterName : stl
+ | otherwise = stl
+ c <- blocksToICML opts stl' cell
+ let cl = return $ inTags True "Cell"
+ [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c
+ liftM2 ($$) cl $ colsToICML rest restAligns rowNr (colNr+1)
+ in do
+ let tabl = if noHeader
+ then rows
+ else headers:rows
+ cells <- rowsToICML tabl (0::Int)
+ let colWidths w = if w > 0
+ then [("SingleColumnWidth",show $ 500 * w)]
+ else []
+ let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup)
+ let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths
+ let tableDoc = return $ inTags True "Table" [
+ ("AppliedTableStyle","TableStyle/Table")
+ , ("HeaderRowCount", nrHeaders)
+ , ("BodyRowCount", show nrRows)
+ , ("ColumnCount", show nrCols)
+ ] (colDescs $$ cells)
+ liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption
+blockToICML opts style (Div _ lst) = blocksToICML opts style lst
+blockToICML _ _ Null = return empty
+
+-- | Convert a list of lists of blocks to ICML list items.
+listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc
+listItemsToICML _ _ _ _ [] = return empty
+listItemsToICML opts listType style attribs (first:rest) = do
+ st <- get
+ put st{ listDepth = 1 + listDepth st}
+ let stl = listType:style
+ let f = listItemToICML opts stl True attribs first
+ let r = map (listItemToICML opts stl False attribs) rest
+ docs <- sequence $ f:r
+ s <- get
+ let maxD = max (maxListDepth s) (listDepth s)
+ put s{ listDepth = 1, maxListDepth = maxD }
+ return $ intersperseBrs docs
+
+-- | Convert a list of blocks to ICML list items.
+listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc
+listItemToICML opts style isFirst attribs item =
+ let makeNumbStart (Just (beginsWith, numbStl, _)) =
+ let doN DefaultStyle = []
+ doN LowerRoman = [lowerRomanName]
+ doN UpperRoman = [upperRomanName]
+ doN LowerAlpha = [lowerAlphaName]
+ doN UpperAlpha = [upperAlphaName]
+ doN _ = []
+ bw = if beginsWith > 1
+ then [beginsWithName ++ show beginsWith]
+ else []
+ in doN numbStl ++ bw
+ makeNumbStart Nothing = []
+ stl = if isFirst
+ then firstListItemName:style
+ else style
+ stl' = makeNumbStart attribs ++ stl
+ in if length item > 1
+ then do
+ let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst
+ insertTab block = blockToICML opts style block
+ f <- blockToICML opts stl' $ head item
+ r <- mapM insertTab $ tail item
+ return $ intersperseBrs (f : r)
+ else blocksToICML opts stl' item
+
+definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc
+definitionListItemToICML opts style (term,defs) = do
+ term' <- parStyle opts (defListTermName:style) term
+ defs' <- mapM (blocksToICML opts (defListDefName:style)) defs
+ return $ intersperseBrs $ (term' : defs')
+
+
+-- | Convert a list of inline elements to ICML.
+inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
+inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
+
+-- | Convert an inline element to ICML.
+inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc
+inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst
+inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst
+inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst
+inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst
+inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst
+inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
+inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
+inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
+inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst
+inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
+inlineToICML _ style Space = charStyle style space
+inlineToICML opts style SoftBreak =
+ case writerWrapText opts of
+ WrapAuto -> charStyle style space
+ WrapNone -> charStyle style space
+ WrapPreserve -> charStyle style cr
+inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
+inlineToICML opts style (Math mt str) =
+ lift (texMathToInlines mt str) >>=
+ (fmap cat . mapM (inlineToICML opts style))
+inlineToICML _ _ il@(RawInline f str)
+ | f == Format "icml" = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToICML opts style (Link _ lst (url, title)) = do
+ content <- inlinesToICML opts (linkName:style) lst
+ state $ \st ->
+ let ident = if null $ links st
+ then 1::Int
+ else 1 + (fst $ head $ links st)
+ newst = st{ links = (ident, url):(links st) }
+ cont = inTags True "HyperlinkTextSource"
+ [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content
+ in (cont, newst)
+inlineToICML opts style (Image attr _ target) = imageICML opts style attr target
+inlineToICML opts style (Note lst) = footnoteToICML opts style lst
+inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst
+
+-- | Convert a list of block elements to an ICML footnote.
+footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc
+footnoteToICML opts style lst =
+ let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls
+ insertTab block = blockToICML opts (footnoteName:style) block
+ in do
+ contents <- mapM insertTab lst
+ let number = inTags True "ParagraphStyleRange" [] $
+ inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>"
+ return $ inTags True "CharacterStyleRange"
+ [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")]
+ $ inTags True "Footnote" [] $ number $$ intersperseBrs contents
+
+-- | Auxiliary function to merge Space elements into the adjacent Strs.
+mergeSpaces :: [Inline] -> [Inline]
+mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x =
+ mergeSpaces $ Str(s++" "++s') : xs
+mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs
+mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs
+mergeSpaces (x:xs) = x : (mergeSpaces xs)
+mergeSpaces [] = []
+
+isSp :: Inline -> Bool
+isSp Space = True
+isSp SoftBreak = True
+isSp _ = False
+
+-- | Intersperse line breaks
+intersperseBrs :: [Doc] -> Doc
+intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty)
+
+-- | Wrap a list of inline elements in an ICML Paragraph Style
+parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc
+parStyle opts style lst =
+ let slipIn x y = if null y
+ then x
+ else x ++ " > " ++ y
+ stlStr = foldr slipIn [] $ reverse style
+ stl = if null stlStr
+ then ""
+ else "ParagraphStyle/" ++ stlStr
+ attrs = ("AppliedParagraphStyle", stl)
+ attrs' = if firstListItemName `elem` style
+ then let ats = attrs : [("NumberingContinue", "false")]
+ begins = filter (isPrefixOf beginsWithName) style
+ in if null begins
+ then ats
+ else let i = maybe "" id $ stripPrefix beginsWithName $ head begins
+ in ("NumberingStartAt", i) : ats
+ else [attrs]
+ in do
+ content <- inlinesToICML opts [] lst
+ let cont = inTags True "ParagraphStyleRange" attrs' content
+ state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st })
+
+-- | Wrap a Doc in an ICML Character Style.
+charStyle :: PandocMonad m => Style -> Doc -> WS m Doc
+charStyle style content =
+ let (stlStr, attrs) = styleToStrAttr style
+ doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
+ in do
+ state $ \st ->
+ let styles = if null stlStr
+ then st
+ else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
+ in (doc, styles)
+
+-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute.
+styleToStrAttr :: Style -> (String, [(String, String)])
+styleToStrAttr style =
+ let stlStr = unwords $ Set.toAscList $ Set.fromList style
+ stl = if null style
+ then "$ID/NormalCharacterStyle"
+ else "CharacterStyle/" ++ stlStr
+ attrs = [("AppliedCharacterStyle", stl)]
+ in (stlStr, attrs)
+
+-- | Assemble an ICML Image.
+imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc
+imageICML opts style attr (src, _) = do
+ res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
+ imgS <- case res of
+ Left (_ :: PandocError) -> do
+ report $ CouldNotFetchResource src ""
+ return def
+ Right (img, _) -> do
+ case imageSize img of
+ Right size -> return size
+ Left msg -> do
+ report $ CouldNotDetermineImageSize src msg
+ return def
+ let (ow, oh) = sizeInPoints imgS
+ (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS
+ hw = showFl $ ow / 2
+ hh = showFl $ oh / 2
+ scale = showFl (imgWidth / ow) ++ " 0 0 " ++ showFl (imgHeight / oh)
+ src' = if isURI src then src else "file:" ++ src
+ (stlStr, attrs) = styleToStrAttr style
+ props = inTags True "Properties" [] $ inTags True "PathGeometry" []
+ $ inTags True "GeometryPathType" [("PathOpen","false")]
+ $ inTags True "PathPointArray" []
+ $ vcat [
+ selfClosingTag "PathPointType" [("Anchor", "-"++hw++" -"++hh),
+ ("LeftDirection", "-"++hw++" -"++hh), ("RightDirection", "-"++hw++" -"++hh)]
+ , selfClosingTag "PathPointType" [("Anchor", "-"++hw++" "++hh),
+ ("LeftDirection", "-"++hw++" "++hh), ("RightDirection", "-"++hw++" "++hh)]
+ , selfClosingTag "PathPointType" [("Anchor", hw++" "++hh),
+ ("LeftDirection", hw++" "++hh), ("RightDirection", hw++" "++hh)]
+ , selfClosingTag "PathPointType" [("Anchor", hw++" -"++hh),
+ ("LeftDirection", hw++" -"++hh), ("RightDirection", hw++" -"++hh)]
+ ]
+ image = inTags True "Image"
+ [("Self","ue6"), ("ItemTransform", scale++" -"++hw++" -"++hh)]
+ $ vcat [
+ inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+ , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')]
+ ]
+ doc = inTags True "CharacterStyleRange" attrs
+ $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"),
+ ("ItemTransform", scale++" "++hw++" -"++hh)]
+ $ (props $$ image)
+ state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } )
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
new file mode 100644
index 000000000..ac2b5d758
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -0,0 +1,1388 @@
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables,
+ PatternGuards #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.LaTeX
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX (
+ writeLaTeX
+ , writeBeamer
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates
+import Text.Pandoc.Logging
+import Text.Printf ( printf )
+import Network.URI ( isURI, unEscapeString )
+import Data.Aeson (object, (.=), FromJSON)
+import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse,
+ nub, nubBy, foldl' )
+import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit,
+ ord, isAlphaNum )
+import Data.Maybe ( fromMaybe, isJust, catMaybes )
+import qualified Data.Text as T
+import Control.Applicative ((<|>))
+import Control.Monad.State
+import qualified Text.Parsec as P
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
+ formatLaTeXInline, formatLaTeXBlock,
+ toListingsLanguage)
+import Text.Pandoc.Class (PandocMonad, report)
+
+data WriterState =
+ WriterState { stInNote :: Bool -- true if we're in a note
+ , stInQuote :: Bool -- true if in a blockquote
+ , stInMinipage :: Bool -- true if in minipage
+ , stInHeading :: Bool -- true if in a section heading
+ , stNotes :: [Doc] -- notes in a minipage
+ , stOLLevel :: Int -- level of ordered list nesting
+ , stOptions :: WriterOptions -- writer options, so they don't have to be parameter
+ , stVerbInNote :: Bool -- true if document has verbatim text in note
+ , stTable :: Bool -- true if document has a table
+ , stStrikeout :: Bool -- true if document has strikeout
+ , stUrl :: Bool -- true if document has visible URL link
+ , stGraphics :: Bool -- true if document contains images
+ , stLHS :: Bool -- true if document has literate haskell code
+ , stBook :: Bool -- true if document uses book or memoir class
+ , stCsquotes :: Bool -- true if document uses csquotes
+ , stHighlighting :: Bool -- true if document has highlighted code
+ , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit
+ , stInternalLinks :: [String] -- list of internal link targets
+ , stUsesEuro :: Bool -- true if euro symbol used
+ , stBeamer :: Bool -- produce beamer
+ }
+
+startingState :: WriterOptions -> WriterState
+startingState options = WriterState {
+ stInNote = False
+ , stInQuote = False
+ , stInMinipage = False
+ , stInHeading = False
+ , stNotes = []
+ , stOLLevel = 1
+ , stOptions = options
+ , stVerbInNote = False
+ , stTable = False
+ , stStrikeout = False
+ , stUrl = False
+ , stGraphics = False
+ , stLHS = False
+ , stBook = (case writerTopLevelDivision options of
+ TopLevelPart -> True
+ TopLevelChapter -> True
+ _ -> False)
+ , stCsquotes = False
+ , stHighlighting = False
+ , stIncremental = writerIncremental options
+ , stInternalLinks = []
+ , stUsesEuro = False
+ , stBeamer = False }
+
+-- | Convert Pandoc to LaTeX.
+writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeLaTeX options document =
+ evalStateT (pandocToLaTeX options document) $
+ startingState options
+
+-- | Convert Pandoc to LaTeX Beamer.
+writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeBeamer options document =
+ evalStateT (pandocToLaTeX options document) $
+ (startingState options){ stBeamer = True }
+
+type LW m = StateT WriterState m
+
+pandocToLaTeX :: PandocMonad m
+ => WriterOptions -> Pandoc -> LW m String
+pandocToLaTeX options (Pandoc meta blocks) = do
+ -- Strip off final 'references' header if --natbib or --biblatex
+ let method = writerCiteMethod options
+ let blocks' = if method == Biblatex || method == Natbib
+ then case reverse blocks of
+ (Div (_,["references"],_) _):xs -> reverse xs
+ _ -> blocks
+ else blocks
+ -- see if there are internal links
+ let isInternalLink (Link _ _ ('#':xs,_)) = [xs]
+ isInternalLink _ = []
+ modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
+ let template = maybe "" id $ writerTemplate options
+ -- set stBook depending on documentclass
+ let colwidth = if writerWrapText options == WrapAuto
+ then Just $ writerColumns options
+ else Nothing
+ metadata <- metaToJSON options
+ (fmap (render colwidth) . blockListToLaTeX)
+ (fmap (render colwidth) . inlineListToLaTeX)
+ meta
+ let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
+ let documentClass = case P.parse pDocumentClass "template" template of
+ Right r -> r
+ Left _ -> ""
+ case lookup "documentclass" (writerVariables options) `mplus`
+ fmap stringify (lookupMeta "documentclass" meta) of
+ Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
+ | otherwise -> return ()
+ Nothing | documentClass `elem` bookClasses
+ -> modify $ \s -> s{stBook = True}
+ | otherwise -> return ()
+ -- check for \usepackage...{csquotes}; if present, we'll use
+ -- \enquote{...} for smart quotes:
+ let headerIncludesField :: FromJSON a => Maybe a
+ headerIncludesField = getField "header-includes" metadata
+ let headerIncludes = fromMaybe [] $ mplus
+ (fmap return headerIncludesField)
+ headerIncludesField
+ when (any (isInfixOf "{csquotes}") (template : headerIncludes)) $
+ modify $ \s -> s{stCsquotes = True}
+ let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
+ (blocks', [])
+ else case last blocks' of
+ Header 1 _ il -> (init blocks', il)
+ _ -> (blocks', [])
+ beamer <- gets stBeamer
+ blocks''' <- if beamer
+ then toSlides blocks''
+ else return blocks''
+ body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'''
+ (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
+ let main = render colwidth $ vsep body
+ st <- get
+ titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
+ authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
+ let docLangs = nub $ query (extract "lang") blocks
+ let hasStringValue x = isJust (getField x metadata :: Maybe String)
+ let geometryFromMargins = intercalate [','] $ catMaybes $
+ map (\(x,y) ->
+ ((x ++ "=") ++) <$> getField y metadata)
+ [("lmargin","margin-left")
+ ,("rmargin","margin-right")
+ ,("tmargin","margin-top")
+ ,("bmargin","margin-bottom")
+ ]
+ let context = defField "toc" (writerTableOfContents options) $
+ defField "toc-depth" (show (writerTOCDepth options -
+ if stBook st
+ then 1
+ else 0)) $
+ defField "body" main $
+ defField "title-meta" titleMeta $
+ defField "author-meta" (intercalate "; " authorsMeta) $
+ defField "documentclass" (if beamer
+ then ("beamer" :: String)
+ else if stBook st
+ then "book"
+ else "article") $
+ defField "verbatim-in-note" (stVerbInNote st) $
+ defField "tables" (stTable st) $
+ defField "strikeout" (stStrikeout st) $
+ defField "url" (stUrl st) $
+ defField "numbersections" (writerNumberSections options) $
+ defField "lhs" (stLHS st) $
+ defField "graphics" (stGraphics st) $
+ defField "book-class" (stBook st) $
+ defField "euro" (stUsesEuro st) $
+ defField "listings" (writerListings options || stLHS st) $
+ defField "beamer" beamer $
+ (if stHighlighting st
+ then case writerHighlightStyle options of
+ Just sty ->
+ defField "highlighting-macros"
+ (styleToLaTeX sty)
+ Nothing -> id
+ else id) $
+ (case writerCiteMethod options of
+ Natbib -> defField "biblio-title" biblioTitle .
+ defField "natbib" True
+ Biblatex -> defField "biblio-title" biblioTitle .
+ defField "biblatex" True
+ _ -> id) $
+ -- set lang to something so polyglossia/babel is included
+ defField "lang" (if null docLangs then ""::String else "en") $
+ defField "otherlangs" docLangs $
+ defField "colorlinks" (any hasStringValue
+ ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
+ defField "dir" (if (null $ query (extract "dir") blocks)
+ then ""::String
+ else "ltr") $
+ defField "section-titles" True $
+ defField "geometry" geometryFromMargins $
+ metadata
+ let toPolyObj lang = object [ "name" .= T.pack name
+ , "options" .= T.pack opts ]
+ where
+ (name, opts) = toPolyglossia lang
+ let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
+ otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
+ let context' =
+ defField "babel-lang" (toBabel lang)
+ $ defField "babel-otherlangs" (map toBabel otherlangs)
+ $ defField "babel-newcommands" (concatMap (\(poly, babel) ->
+ -- \textspanish and \textgalician are already used by babel
+ -- save them as \oritext... and let babel use that
+ if poly `elem` ["spanish", "galician"]
+ then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
+ "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
+ "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
+ ++ poly ++ "}}\n" ++
+ "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
+ "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+ ++ poly ++ "}{##2}}}\n"
+ else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+ ++ babel ++ "}{#2}}\n" ++
+ "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{"
+ ++ babel ++ "}}{\\end{otherlanguage}}\n"
+ )
+ -- eliminate duplicates that have same polyglossia name
+ $ nubBy (\a b -> fst a == fst b)
+ -- find polyglossia and babel names of languages used in the document
+ $ map (\l ->
+ let lng = splitBy (=='-') l
+ in (fst $ toPolyglossia lng, toBabel lng)
+ )
+ docLangs )
+ $ defField "polyglossia-lang" (toPolyObj lang)
+ $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
+ $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
+ Just "rtl" -> True
+ _ -> False)
+ $ context
+ return $ case writerTemplate options of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context'
+
+-- | Convert Elements to LaTeX
+elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc
+elementToLaTeX _ (Blk block) = blockToLaTeX block
+elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do
+ modify $ \s -> s{stInHeading = True}
+ header' <- sectionHeader ("unnumbered" `elem` classes) id' level title'
+ modify $ \s -> s{stInHeading = False}
+ innerContents <- mapM (elementToLaTeX opts) elements
+ return $ vsep (header' : innerContents)
+
+data StringContext = TextString
+ | URLString
+ | CodeString
+ deriving (Eq)
+
+-- escape things as needed for LaTeX
+stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String
+stringToLaTeX _ [] = return ""
+stringToLaTeX ctx (x:xs) = do
+ opts <- gets stOptions
+ rest <- stringToLaTeX ctx xs
+ let ligatures = isEnabled Ext_smart opts && ctx == TextString
+ let isUrl = ctx == URLString
+ when (x == '€') $
+ modify $ \st -> st{ stUsesEuro = True }
+ return $
+ case x of
+ '€' -> "\\euro{}" ++ rest
+ '{' -> "\\{" ++ rest
+ '}' -> "\\}" ++ rest
+ '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest
+ '$' | not isUrl -> "\\$" ++ rest
+ '%' -> "\\%" ++ rest
+ '&' -> "\\&" ++ rest
+ '_' | not isUrl -> "\\_" ++ rest
+ '#' -> "\\#" ++ rest
+ '-' | not isUrl -> case xs of
+ -- prevent adjacent hyphens from forming ligatures
+ ('-':_) -> "-\\/" ++ rest
+ _ -> '-' : rest
+ '~' | not isUrl -> "\\textasciitilde{}" ++ rest
+ '^' -> "\\^{}" ++ rest
+ '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows
+ | otherwise -> "\\textbackslash{}" ++ rest
+ '|' | not isUrl -> "\\textbar{}" ++ rest
+ '<' -> "\\textless{}" ++ rest
+ '>' -> "\\textgreater{}" ++ rest
+ '[' -> "{[}" ++ rest -- to avoid interpretation as
+ ']' -> "{]}" ++ rest -- optional arguments
+ '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest
+ '\160' -> "~" ++ rest
+ '\x202F' -> "\\," ++ rest
+ '\x2026' -> "\\ldots{}" ++ rest
+ '\x2018' | ligatures -> "`" ++ rest
+ '\x2019' | ligatures -> "'" ++ rest
+ '\x201C' | ligatures -> "``" ++ rest
+ '\x201D' | ligatures -> "''" ++ rest
+ '\x2014' | ligatures -> "---" ++ rest
+ '\x2013' | ligatures -> "--" ++ rest
+ _ -> x : rest
+
+toLabel :: PandocMonad m => String -> LW m String
+toLabel z = go `fmap` stringToLaTeX URLString z
+ where go [] = ""
+ go (x:xs)
+ | (isLetter x || isDigit x) && isAscii x = x:go xs
+ | elem x ("_-+=:;." :: String) = x:go xs
+ | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs
+
+-- | Puts contents into LaTeX command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '\\' <> text cmd <> braces contents
+
+toSlides :: PandocMonad m => [Block] -> LW m [Block]
+toSlides bs = do
+ opts <- gets stOptions
+ let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts
+ let bs' = prepSlides slideLevel bs
+ concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
+
+elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block]
+elementToBeamer _slideLevel (Blk b) = return [b]
+elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
+ | lvl > slideLevel = do
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ Para ( RawInline "latex" "\\begin{block}{"
+ : tit ++ [RawInline "latex" "}"] )
+ : bs ++ [RawBlock "latex" "\\end{block}"]
+ | lvl < slideLevel = do
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ (Header lvl (ident,classes,kvs) tit) : bs
+ | otherwise = do -- lvl == slideLevel
+ -- note: [fragile] is required or verbatim breaks
+ let hasCodeBlock (CodeBlock _ _) = [True]
+ hasCodeBlock _ = []
+ let hasCode (Code _ _) = [True]
+ hasCode _ = []
+ let fragile = "fragile" `elem` classes ||
+ not (null $ query hasCodeBlock elts ++ query hasCode elts)
+ let frameoptions = ["allowdisplaybreaks", "allowframebreaks",
+ "b", "c", "t", "environment",
+ "label", "plain", "shrink", "standout"]
+ let optionslist = ["fragile" | fragile] ++
+ [k | k <- classes, k `elem` frameoptions] ++
+ [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions]
+ let options = if null optionslist
+ then ""
+ else "[" ++ intercalate "," optionslist ++ "]"
+ let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) :
+ if tit == [Str "\0"] -- marker for hrule
+ then []
+ else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"]
+ let slideEnd = RawBlock "latex" "\\end{frame}"
+ -- now carve up slide into blocks if there are sections inside
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ slideStart : bs ++ [slideEnd]
+
+isListBlock :: Block -> Bool
+isListBlock (BulletList _) = True
+isListBlock (OrderedList _ _) = True
+isListBlock (DefinitionList _) = True
+isListBlock _ = False
+
+isLineBreakOrSpace :: Inline -> Bool
+isLineBreakOrSpace LineBreak = True
+isLineBreakOrSpace SoftBreak = True
+isLineBreakOrSpace Space = True
+isLineBreakOrSpace _ = False
+
+-- | Convert Pandoc block element to LaTeX.
+blockToLaTeX :: PandocMonad m
+ => Block -- ^ Block to convert
+ -> LW m Doc
+blockToLaTeX Null = return empty
+blockToLaTeX (Div (identifier,classes,kvs) bs) = do
+ beamer <- gets stBeamer
+ ref <- toLabel identifier
+ let linkAnchor = if null identifier
+ then empty
+ else "\\hypertarget" <> braces (text ref) <>
+ braces empty
+ let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+ let wrapDir = case lookup "dir" kvs of
+ Just "rtl" -> align "RTL"
+ Just "ltr" -> align "LTR"
+ _ -> id
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> let (l, o) = toPolyglossiaEnv lng
+ ops = if null o
+ then ""
+ else brackets $ text o
+ in inCmd "begin" (text l) <> ops
+ $$ blankline <> txt <> blankline
+ $$ inCmd "end" (text l)
+ Nothing -> txt
+ wrapNotes txt = if beamer && "notes" `elem` classes
+ then "\\note" <> braces txt -- speaker notes
+ else linkAnchor $$ txt
+ fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
+blockToLaTeX (Plain lst) =
+ inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
+-- title beginning with fig: indicates that the image is a figure
+blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do
+ inNote <- gets stInNote
+ modify $ \st -> st{ stInMinipage = True, stNotes = [] }
+ capt <- inlineListToLaTeX txt
+ notes <- gets stNotes
+ modify $ \st -> st{ stInMinipage = False, stNotes = [] }
+
+ -- We can't have footnotes in the list of figures, so remove them:
+ captForLof <- if null notes
+ then return empty
+ else brackets <$> inlineListToLaTeX (walk deNote txt)
+ img <- inlineToLaTeX (Image attr txt (src,tit))
+ let footnotes = notesToLaTeX notes
+ lab <- labelFor ident
+ let caption = "\\caption" <> captForLof <> braces capt <> lab
+ figure <- hypertarget ident (cr <>
+ "\\begin{figure}" $$ "\\centering" $$ img $$
+ caption $$ "\\end{figure}" <> cr)
+ return $ if inNote
+ -- can't have figures in notes
+ then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}"
+ else figure $$ footnotes
+-- . . . indicates pause in beamer slides
+blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do
+ beamer <- gets stBeamer
+ if beamer
+ then blockToLaTeX (RawBlock "latex" "\\pause")
+ else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."]
+blockToLaTeX (Para lst) =
+ inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
+blockToLaTeX (LineBlock lns) = do
+ blockToLaTeX $ linesToPara lns
+blockToLaTeX (BlockQuote lst) = do
+ beamer <- gets stBeamer
+ case lst of
+ [b] | beamer && isListBlock b -> do
+ oldIncremental <- gets stIncremental
+ modify $ \s -> s{ stIncremental = not oldIncremental }
+ result <- blockToLaTeX b
+ modify $ \s -> s{ stIncremental = oldIncremental }
+ return result
+ _ -> do
+ oldInQuote <- gets stInQuote
+ modify (\s -> s{stInQuote = True})
+ contents <- blockListToLaTeX lst
+ modify (\s -> s{stInQuote = oldInQuote})
+ return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
+blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do
+ opts <- gets stOptions
+ ref <- toLabel identifier
+ let linkAnchor = if null identifier
+ then empty
+ else "\\hypertarget" <> braces (text ref) <>
+ braces ("\\label" <> braces (text ref))
+ let lhsCodeBlock = do
+ modify $ \s -> s{ stLHS = True }
+ return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$
+ "\\end{code}") $$ cr
+ let rawCodeBlock = do
+ st <- get
+ env <- if stInNote st
+ then modify (\s -> s{ stVerbInNote = True }) >>
+ return "Verbatim"
+ else return "verbatim"
+ return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$
+ text str $$ text ("\\end{" ++ env ++ "}")) <> cr
+ let listingsCodeBlock = do
+ st <- get
+ let params = if writerListings (stOptions st)
+ then (case getListingsLanguage classes of
+ Just l -> [ "language=" ++ mbBraced l ]
+ Nothing -> []) ++
+ [ "numbers=left" | "numberLines" `elem` classes
+ || "number" `elem` classes
+ || "number-lines" `elem` classes ] ++
+ [ (if key == "startFrom"
+ then "firstnumber"
+ else key) ++ "=" ++ mbBraced attr |
+ (key,attr) <- keyvalAttr ] ++
+ (if identifier == ""
+ then []
+ else [ "label=" ++ ref ])
+
+ else []
+ printParams
+ | null params = empty
+ | otherwise = brackets $ hcat (intersperse ", "
+ (map text params))
+ return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
+ "\\end{lstlisting}") $$ cr
+ let highlightedCodeBlock =
+ case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
+ Nothing -> rawCodeBlock
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (flush $ linkAnchor $$ text (T.unpack h))
+ case () of
+ _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes &&
+ "literate" `elem` classes -> lhsCodeBlock
+ | writerListings opts -> listingsCodeBlock
+ | not (null classes) && isJust (writerHighlightStyle opts)
+ -> highlightedCodeBlock
+ | otherwise -> rawCodeBlock
+blockToLaTeX b@(RawBlock f x)
+ | f == Format "latex" || f == Format "tex"
+ = return $ text x
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToLaTeX (BulletList []) = return empty -- otherwise latex error
+blockToLaTeX (BulletList lst) = do
+ incremental <- gets stIncremental
+ beamer <- gets stBeamer
+ let inc = if beamer && incremental then "[<+->]" else ""
+ items <- mapM listItemToLaTeX lst
+ let spacing = if isTightList lst
+ then text "\\tightlist"
+ else empty
+ return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$
+ "\\end{itemize}"
+blockToLaTeX (OrderedList _ []) = return empty -- otherwise latex error
+blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
+ st <- get
+ let inc = if stIncremental st then "[<+->]" else ""
+ let oldlevel = stOLLevel st
+ put $ st {stOLLevel = oldlevel + 1}
+ items <- mapM listItemToLaTeX lst
+ modify (\s -> s {stOLLevel = oldlevel})
+ let tostyle x = case numstyle of
+ Decimal -> "\\arabic" <> braces x
+ UpperRoman -> "\\Roman" <> braces x
+ LowerRoman -> "\\roman" <> braces x
+ UpperAlpha -> "\\Alph" <> braces x
+ LowerAlpha -> "\\alph" <> braces x
+ Example -> "\\arabic" <> braces x
+ DefaultStyle -> "\\arabic" <> braces x
+ let todelim x = case numdelim of
+ OneParen -> x <> ")"
+ TwoParens -> parens x
+ Period -> x <> "."
+ _ -> x <> "."
+ let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel)
+ let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim
+ then empty
+ else "\\def" <> "\\label" <> enum <>
+ braces (todelim $ tostyle enum)
+ let resetcounter = if start == 1 || oldlevel > 4
+ then empty
+ else "\\setcounter" <> braces enum <>
+ braces (text $ show $ start - 1)
+ let spacing = if isTightList lst
+ then text "\\tightlist"
+ else empty
+ return $ text ("\\begin{enumerate}" ++ inc)
+ $$ stylecommand
+ $$ resetcounter
+ $$ spacing
+ $$ vcat items
+ $$ "\\end{enumerate}"
+blockToLaTeX (DefinitionList []) = return empty
+blockToLaTeX (DefinitionList lst) = do
+ incremental <- gets stIncremental
+ let inc = if incremental then "[<+->]" else ""
+ items <- mapM defListItemToLaTeX lst
+ let spacing = if all isTightList (map snd lst)
+ then text "\\tightlist"
+ else empty
+ return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$
+ "\\end{description}"
+blockToLaTeX HorizontalRule = return $
+ "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"
+blockToLaTeX (Header level (id',classes,_) lst) = do
+ modify $ \s -> s{stInHeading = True}
+ hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst
+ modify $ \s -> s{stInHeading = False}
+ return hdr
+blockToLaTeX (Table caption aligns widths heads rows) = do
+ headers <- if all null heads
+ then return empty
+ else do
+ contents <- (tableRowToLaTeX True aligns widths) heads
+ return ("\\toprule" $$ contents $$ "\\midrule")
+ let endhead = if all null heads
+ then empty
+ else text "\\endhead"
+ let endfirsthead = if all null heads
+ then empty
+ else text "\\endfirsthead"
+ captionText <- inlineListToLaTeX caption
+ let capt = if isEmpty captionText
+ then empty
+ else text "\\caption" <> braces captionText <> "\\tabularnewline"
+ $$ headers
+ $$ endfirsthead
+ rows' <- mapM (tableRowToLaTeX False aligns widths) rows
+ let colDescriptors = text $ concat $ map toColDescriptor aligns
+ modify $ \s -> s{ stTable = True }
+ return $ "\\begin{longtable}[]" <>
+ braces ("@{}" <> colDescriptors <> "@{}")
+ -- the @{} removes extra space at beginning and end
+ $$ capt
+ $$ (if all null heads then "\\toprule" else empty)
+ $$ headers
+ $$ endhead
+ $$ vcat rows'
+ $$ "\\bottomrule"
+ $$ "\\end{longtable}"
+
+toColDescriptor :: Alignment -> String
+toColDescriptor align =
+ case align of
+ AlignLeft -> "l"
+ AlignRight -> "r"
+ AlignCenter -> "c"
+ AlignDefault -> "l"
+
+blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc
+blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst
+
+tableRowToLaTeX :: PandocMonad m
+ => Bool
+ -> [Alignment]
+ -> [Double]
+ -> [[Block]]
+ -> LW m Doc
+tableRowToLaTeX header aligns widths cols = do
+ -- scale factor compensates for extra space between columns
+ -- so the whole table isn't larger than columnwidth
+ let scaleFactor = 0.97 ** fromIntegral (length aligns)
+ let isSimple [Plain _] = True
+ isSimple [Para _] = True
+ isSimple [] = True
+ isSimple _ = False
+ -- simple tables have to have simple cells:
+ let widths' = if not (all isSimple cols)
+ then replicate (length aligns)
+ (0.97 / fromIntegral (length aligns))
+ else map (scaleFactor *) widths
+ cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
+ return $ hsep (intersperse "&" cells) <> "\\tabularnewline"
+
+-- For simple latex tables (without minipages or parboxes),
+-- we need to go to some lengths to get line breaks working:
+-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
+fixLineBreaks :: Block -> Block
+fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils
+fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
+fixLineBreaks x = x
+
+fixLineBreaks' :: [Inline] -> [Inline]
+fixLineBreaks' ils = case splitBy (== LineBreak) ils of
+ [] -> []
+ [xs] -> xs
+ chunks -> RawInline "tex" "\\vtop{" :
+ concatMap tohbox chunks ++
+ [RawInline "tex" "}"]
+ where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++
+ [RawInline "tex" "}"]
+
+-- We also change display math to inline math, since display
+-- math breaks in simple tables.
+displayMathToInline :: Inline -> Inline
+displayMathToInline (Math DisplayMath x) = Math InlineMath x
+displayMathToInline x = x
+
+tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block])
+ -> LW m Doc
+tableCellToLaTeX _ (0, _, blocks) =
+ blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
+tableCellToLaTeX header (width, align, blocks) = do
+ modify $ \st -> st{ stInMinipage = True, stNotes = [] }
+ cellContents <- blockListToLaTeX blocks
+ notes <- gets stNotes
+ modify $ \st -> st{ stInMinipage = False, stNotes = [] }
+ let valign = text $ if header then "[b]" else "[t]"
+ let halign = case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright"
+ return $ ("\\begin{minipage}" <> valign <>
+ braces (text (printf "%.2f\\columnwidth" width)) <>
+ (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <>
+ "\\end{minipage}") $$
+ notesToLaTeX notes
+
+notesToLaTeX :: [Doc] -> Doc
+notesToLaTeX [] = empty
+notesToLaTeX ns = (case length ns of
+ n | n > 1 -> "\\addtocounter" <>
+ braces "footnote" <>
+ braces (text $ show $ 1 - n)
+ | otherwise -> empty)
+ $$
+ vcat (intersperse
+ ("\\addtocounter" <> braces "footnote" <> braces "1")
+ $ map (\x -> "\\footnotetext" <> braces x)
+ $ reverse ns)
+
+listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc
+listItemToLaTeX lst
+ -- we need to put some text before a header if it's the first
+ -- element in an item. This will look ugly in LaTeX regardless, but
+ -- this will keep the typesetter from throwing an error.
+ | ((Header _ _ _) :_) <- lst =
+ blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2)
+ | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) .
+ (nest 2)
+
+defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc
+defListItemToLaTeX (term, defs) = do
+ term' <- inlineListToLaTeX term
+ -- put braces around term if it contains an internal link,
+ -- since otherwise we get bad bracket interactions: \item[\hyperref[..]
+ let isInternalLink (Link _ _ ('#':_,_)) = True
+ isInternalLink _ = False
+ let term'' = if any isInternalLink term
+ then braces term'
+ else term'
+ def' <- liftM vsep $ mapM blockListToLaTeX defs
+ return $ case defs of
+ (((Header _ _ _) : _) : _) ->
+ "\\item" <> brackets term'' <> " ~ " $$ def'
+ _ ->
+ "\\item" <> brackets term'' $$ def'
+
+-- | Craft the section header, inserting the secton reference, if supplied.
+sectionHeader :: PandocMonad m
+ => Bool -- True for unnumbered
+ -> [Char]
+ -> Int
+ -> [Inline]
+ -> LW m Doc
+sectionHeader unnumbered ident level lst = do
+ txt <- inlineListToLaTeX lst
+ plain <- stringToLaTeX TextString $ concatMap stringify lst
+ let removeInvalidInline (Note _) = []
+ removeInvalidInline (Span (id', _, _) _) | not (null id') = []
+ removeInvalidInline (Image _ _ _) = []
+ removeInvalidInline x = [x]
+ let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst
+ txtNoNotes <- inlineListToLaTeX lstNoNotes
+ -- footnotes in sections don't work (except for starred variants)
+ -- unless you specify an optional argument:
+ -- \section[mysec]{mysec\footnote{blah}}
+ optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == []
+ then return empty
+ else do
+ return $ brackets txtNoNotes
+ let contents = if render Nothing txt == plain
+ then braces txt
+ else braces (text "\\texorpdfstring"
+ <> braces txt
+ <> braces (text plain))
+ book <- gets stBook
+ opts <- gets stOptions
+ let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault
+ then TopLevelChapter
+ else writerTopLevelDivision opts
+ beamer <- gets stBeamer
+ let level' = if beamer &&
+ topLevelDivision `elem` [TopLevelPart, TopLevelChapter]
+ -- beamer has parts but no chapters
+ then if level == 1 then -1 else level - 1
+ else case topLevelDivision of
+ TopLevelPart -> level - 2
+ TopLevelChapter -> level - 1
+ TopLevelSection -> level
+ TopLevelDefault -> level
+ let sectionType = case level' of
+ -1 -> "part"
+ 0 -> "chapter"
+ 1 -> "section"
+ 2 -> "subsection"
+ 3 -> "subsubsection"
+ 4 -> "paragraph"
+ 5 -> "subparagraph"
+ _ -> ""
+ inQuote <- gets stInQuote
+ let prefix = if inQuote && level' >= 4
+ then text "\\mbox{}%"
+ -- needed for \paragraph, \subparagraph in quote environment
+ -- see http://tex.stackexchange.com/questions/169830/
+ else empty
+ lab <- labelFor ident
+ let star = if unnumbered && level' < 4 then text "*" else empty
+ let stuffing = star <> optional <> contents
+ stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab
+ return $ if level' > 5
+ then txt
+ else prefix $$ stuffing'
+ $$ if unnumbered
+ then "\\addcontentsline{toc}" <>
+ braces (text sectionType) <>
+ braces txtNoNotes
+ else empty
+
+hypertarget :: PandocMonad m => String -> Doc -> LW m Doc
+hypertarget ident x = do
+ ref <- text `fmap` toLabel ident
+ internalLinks <- gets stInternalLinks
+ return $
+ if ident `elem` internalLinks
+ then text "\\hypertarget"
+ <> braces ref
+ <> braces x
+ else x
+
+labelFor :: PandocMonad m => String -> LW m Doc
+labelFor "" = return empty
+labelFor ident = do
+ ref <- text `fmap` toLabel ident
+ return $ text "\\label" <> braces ref
+
+-- | Convert list of inline elements to LaTeX.
+inlineListToLaTeX :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> LW m Doc
+inlineListToLaTeX lst =
+ mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst)
+ >>= return . hcat
+ -- nonbreaking spaces (~) in LaTeX don't work after line breaks,
+ -- so we turn nbsps after hard breaks to \hspace commands.
+ -- this is mostly used in verse.
+ where fixLineInitialSpaces [] = []
+ fixLineInitialSpaces (LineBreak : Str s@('\160':_) : xs) =
+ LineBreak : fixNbsps s ++ fixLineInitialSpaces xs
+ fixLineInitialSpaces (x:xs) = x : fixLineInitialSpaces xs
+ fixNbsps s = let (ys,zs) = span (=='\160') s
+ in replicate (length ys) hspace ++ [Str zs]
+ hspace = RawInline "latex" "\\hspace*{0.333em}"
+ -- linebreaks after blank lines cause problems:
+ fixBreaks [] = []
+ fixBreaks ys@(LineBreak : LineBreak : _) =
+ case span (== LineBreak) ys of
+ (lbs, rest) -> RawInline "latex"
+ ("\\\\[" ++ show (length lbs) ++
+ "\\baselineskip]") : fixBreaks rest
+ fixBreaks (y:ys) = y : fixBreaks ys
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted _ = False
+
+-- | Convert inline element to LaTeX
+inlineToLaTeX :: PandocMonad m
+ => Inline -- ^ Inline to convert
+ -> LW m Doc
+inlineToLaTeX (Span (id',classes,kvs) ils) = do
+ ref <- toLabel id'
+ let linkAnchor = if null id'
+ then empty
+ else "\\protect\\hypertarget" <> braces (text ref) <>
+ braces empty
+ let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
+ ["textnormal" | "csl-no-strong" `elem` classes ||
+ "csl-no-smallcaps" `elem` classes] ++
+ ["RL" | ("dir", "rtl") `elem` kvs] ++
+ ["LR" | ("dir", "ltr") `elem` kvs] ++
+ (case lookup "lang" kvs of
+ Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng
+ ops = if null o then "" else ("[" ++ o ++ "]")
+ in ["text" ++ l ++ ops]
+ Nothing -> [])
+ contents <- inlineListToLaTeX ils
+ return $ linkAnchor <>
+ if null cmds
+ then braces contents
+ else foldr inCmd contents cmds
+inlineToLaTeX (Emph lst) =
+ inlineListToLaTeX lst >>= return . inCmd "emph"
+inlineToLaTeX (Strong lst) =
+ inlineListToLaTeX lst >>= return . inCmd "textbf"
+inlineToLaTeX (Strikeout lst) = do
+ -- we need to protect VERB in an mbox or we get an error
+ -- see #1294
+ contents <- inlineListToLaTeX $ protectCode lst
+ modify $ \s -> s{ stStrikeout = True }
+ return $ inCmd "sout" contents
+inlineToLaTeX (Superscript lst) =
+ inlineListToLaTeX lst >>= return . inCmd "textsuperscript"
+inlineToLaTeX (Subscript lst) = do
+ inlineListToLaTeX lst >>= return . inCmd "textsubscript"
+inlineToLaTeX (SmallCaps lst) =
+ inlineListToLaTeX lst >>= return . inCmd "textsc"
+inlineToLaTeX (Cite cits lst) = do
+ st <- get
+ let opts = stOptions st
+ case writerCiteMethod opts of
+ Natbib -> citationsToNatbib cits
+ Biblatex -> citationsToBiblatex cits
+ _ -> inlineListToLaTeX lst
+
+inlineToLaTeX (Code (_,classes,_) str) = do
+ opts <- gets stOptions
+ inHeading <- gets stInHeading
+ case () of
+ _ | writerListings opts && not inHeading -> listingsCode
+ | isJust (writerHighlightStyle opts) && not (null classes)
+ -> highlightCode
+ | otherwise -> rawCode
+ where listingsCode = do
+ let listingsopt = case getListingsLanguage classes of
+ Just l -> "[language=" ++ mbBraced l ++ "]"
+ Nothing -> ""
+ inNote <- gets stInNote
+ when inNote $ modify $ \s -> s{ stVerbInNote = True }
+ let chr = case "!\"&'()*,-./:;?@_" \\ str of
+ (c:_) -> c
+ [] -> '!'
+ return $ text $ "\\lstinline" ++ listingsopt ++ [chr] ++ str ++ [chr]
+ highlightCode = do
+ case highlight formatLaTeXInline ("",classes,[]) str of
+ Nothing -> rawCode
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (text (T.unpack h))
+ rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}"))
+ $ stringToLaTeX CodeString str
+ where
+ escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c])
+inlineToLaTeX (Quoted qt lst) = do
+ contents <- inlineListToLaTeX lst
+ csquotes <- liftM stCsquotes get
+ opts <- gets stOptions
+ if csquotes
+ then return $ "\\enquote" <> braces contents
+ else do
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then "\\,"
+ else empty
+ let inner = s1 <> contents <> s2
+ return $ case qt of
+ DoubleQuote ->
+ if isEnabled Ext_smart opts
+ then text "``" <> inner <> text "''"
+ else char '\x201C' <> inner <> char '\x201D'
+ SingleQuote ->
+ if isEnabled Ext_smart opts
+ then char '`' <> inner <> char '\''
+ else char '\x2018' <> inner <> char '\x2019'
+inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str
+inlineToLaTeX (Math InlineMath str) =
+ return $ "\\(" <> text str <> "\\)"
+inlineToLaTeX (Math DisplayMath str) =
+ return $ "\\[" <> text str <> "\\]"
+inlineToLaTeX il@(RawInline f str)
+ | f == Format "latex" || f == Format "tex"
+ = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr
+inlineToLaTeX SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ case wrapText of
+ WrapAuto -> return space
+ WrapNone -> return space
+ WrapPreserve -> return cr
+inlineToLaTeX Space = return space
+inlineToLaTeX (Link _ txt ('#':ident, _)) = do
+ contents <- inlineListToLaTeX txt
+ lab <- toLabel ident
+ return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents
+inlineToLaTeX (Link _ txt (src, _)) =
+ case txt of
+ [Str x] | escapeURI x == src -> -- autolink
+ do modify $ \s -> s{ stUrl = True }
+ src' <- stringToLaTeX URLString (escapeURI src)
+ return $ text $ "\\url{" ++ src' ++ "}"
+ [Str x] | Just rest <- stripPrefix "mailto:" src,
+ escapeURI x == rest -> -- email autolink
+ do modify $ \s -> s{ stUrl = True }
+ src' <- stringToLaTeX URLString (escapeURI src)
+ contents <- inlineListToLaTeX txt
+ return $ "\\href" <> braces (text src') <>
+ braces ("\\nolinkurl" <> braces contents)
+ _ -> do contents <- inlineListToLaTeX txt
+ src' <- stringToLaTeX URLString (escapeURI src)
+ return $ text ("\\href{" ++ src' ++ "}{") <>
+ contents <> char '}'
+inlineToLaTeX (Image attr _ (source, _)) = do
+ modify $ \s -> s{ stGraphics = True }
+ opts <- gets stOptions
+ let showDim dir = let d = text (show dir) <> "="
+ in case (dimension dir attr) of
+ Just (Pixel a) ->
+ [d <> text (showInInch opts (Pixel a)) <> "in"]
+ Just (Percent a) ->
+ [d <> text (showFl (a / 100)) <> "\\textwidth"]
+ Just dim ->
+ [d <> text (show dim)]
+ Nothing ->
+ []
+ dimList = showDim Width ++ showDim Height
+ dims = if null dimList
+ then empty
+ else brackets $ cat (intersperse "," dimList)
+ source' = if isURI source
+ then source
+ else unEscapeString source
+ source'' <- stringToLaTeX URLString source'
+ inHeading <- gets stInHeading
+ return $
+ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <>
+ dims <> braces (text source'')
+inlineToLaTeX (Note contents) = do
+ inMinipage <- gets stInMinipage
+ modify (\s -> s{stInNote = True})
+ contents' <- blockListToLaTeX contents
+ modify (\s -> s {stInNote = False})
+ let optnl = case reverse contents of
+ (CodeBlock _ _ : _) -> cr
+ _ -> empty
+ let noteContents = nest 2 contents' <> optnl
+ beamer <- gets stBeamer
+ -- in beamer slides, display footnote from current overlay forward
+ let beamerMark = if beamer
+ then text "<.->"
+ else empty
+ modify $ \st -> st{ stNotes = noteContents : stNotes st }
+ return $
+ if inMinipage
+ then "\\footnotemark{}"
+ -- note: a \n before } needed when note ends with a Verbatim environment
+ else "\\footnote" <> beamerMark <> braces noteContents
+
+protectCode :: [Inline] -> [Inline]
+protectCode [] = []
+protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs
+protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs
+ where ltx = RawInline (Format "latex")
+protectCode (x : xs) = x : protectCode xs
+
+citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc
+citationsToNatbib (one:[])
+ = citeCommand c p s k
+ where
+ Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ }
+ = one
+ c = case m of
+ AuthorInText -> "citet"
+ SuppressAuthor -> "citeyearpar"
+ NormalCitation -> "citep"
+
+citationsToNatbib cits
+ | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
+ = citeCommand "citep" p s ks
+ where
+ noPrefix = all (null . citationPrefix)
+ noSuffix = all (null . citationSuffix)
+ ismode m = all (((==) m) . citationMode)
+ p = citationPrefix $ head $ cits
+ s = citationSuffix $ last $ cits
+ ks = intercalate ", " $ map citationId cits
+
+citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
+ author <- citeCommand "citeauthor" [] [] (citationId c)
+ cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs)
+ return $ author <+> cits
+
+citationsToNatbib cits = do
+ cits' <- mapM convertOne cits
+ return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
+ where
+ combineTwo a b | isEmpty a = b
+ | otherwise = a <> text "; " <> b
+ convertOne Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ }
+ = case m of
+ AuthorInText -> citeCommand "citealt" p s k
+ SuppressAuthor -> citeCommand "citeyear" p s k
+ NormalCitation -> citeCommand "citealp" p s k
+
+citeCommand :: PandocMonad m
+ => String -> [Inline] -> [Inline] -> String -> LW m Doc
+citeCommand c p s k = do
+ args <- citeArguments p s k
+ return $ text ("\\" ++ c) <> args
+
+citeArguments :: PandocMonad m
+ => [Inline] -> [Inline] -> String -> LW m Doc
+citeArguments p s k = do
+ let s' = case s of
+ (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r
+ (Str (x:xs) : r) | isPunctuation x -> Str xs : r
+ _ -> s
+ pdoc <- inlineListToLaTeX p
+ sdoc <- inlineListToLaTeX s'
+ let optargs = case (isEmpty pdoc, isEmpty sdoc) of
+ (True, True ) -> empty
+ (True, False) -> brackets sdoc
+ (_ , _ ) -> brackets pdoc <> brackets sdoc
+ return $ optargs <> braces (text k)
+
+citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc
+citationsToBiblatex (one:[])
+ = citeCommand cmd p s k
+ where
+ Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ } = one
+ cmd = case m of
+ SuppressAuthor -> "autocite*"
+ AuthorInText -> "textcite"
+ NormalCitation -> "autocite"
+
+citationsToBiblatex (c:cs) = do
+ args <- mapM convertOne (c:cs)
+ return $ text cmd <> foldl' (<>) empty args
+ where
+ cmd = case citationMode c of
+ SuppressAuthor -> "\\autocites*"
+ AuthorInText -> "\\textcites"
+ NormalCitation -> "\\autocites"
+ convertOne Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ }
+ = citeArguments p s k
+
+citationsToBiblatex _ = return empty
+
+-- Determine listings language from list of class attributes.
+getListingsLanguage :: [String] -> Maybe String
+getListingsLanguage [] = Nothing
+getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
+
+mbBraced :: String -> String
+mbBraced x = if not (all isAlphaNum x)
+ then "{" <> x <> "}"
+ else x
+
+-- Extract a key from divs and spans
+extract :: String -> Block -> [String]
+extract key (Div attr _) = lookKey key attr
+extract key (Plain ils) = concatMap (extractInline key) ils
+extract key (Para ils) = concatMap (extractInline key) ils
+extract key (Header _ _ ils) = concatMap (extractInline key) ils
+extract _ _ = []
+
+-- Extract a key from spans
+extractInline :: String -> Inline -> [String]
+extractInline key (Span attr _) = lookKey key attr
+extractInline _ _ = []
+
+-- Look up a key in an attribute and give a list of its values
+lookKey :: String -> Attr -> [String]
+lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
+
+-- In environments \Arabic instead of \arabic is used
+toPolyglossiaEnv :: String -> (String, String)
+toPolyglossiaEnv l =
+ case toPolyglossia $ (splitBy (=='-')) l of
+ ("arabic", o) -> ("Arabic", o)
+ x -> x
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Polyglossia (language, options) tuple
+-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
+toPolyglossia :: [String] -> (String, String)
+toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria")
+toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq")
+toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq")
+toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq")
+toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya")
+toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco")
+toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania")
+toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq")
+toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq")
+toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia")
+toPolyglossia ("de":"1901":_) = ("german", "spelling=old")
+toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old")
+toPolyglossia ("de":"AT":_) = ("german", "variant=austrian")
+toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old")
+toPolyglossia ("de":"CH":_) = ("german", "variant=swiss")
+toPolyglossia ("de":_) = ("german", "")
+toPolyglossia ("dsb":_) = ("lsorbian", "")
+toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly")
+toPolyglossia ("en":"AU":_) = ("english", "variant=australian")
+toPolyglossia ("en":"CA":_) = ("english", "variant=canadian")
+toPolyglossia ("en":"GB":_) = ("english", "variant=british")
+toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand")
+toPolyglossia ("en":"UK":_) = ("english", "variant=british")
+toPolyglossia ("en":"US":_) = ("english", "variant=american")
+toPolyglossia ("grc":_) = ("greek", "variant=ancient")
+toPolyglossia ("hsb":_) = ("usorbian", "")
+toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic")
+toPolyglossia ("sl":_) = ("slovenian", "")
+toPolyglossia x = (commonFromBcp47 x, "")
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Babel language string.
+-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
+-- List of supported languages (slightly outdated):
+-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
+toBabel :: [String] -> String
+toBabel ("de":"1901":_) = "german"
+toBabel ("de":"AT":"1901":_) = "austrian"
+toBabel ("de":"AT":_) = "naustrian"
+toBabel ("de":"CH":"1901":_) = "swissgerman"
+toBabel ("de":"CH":_) = "nswissgerman"
+toBabel ("de":_) = "ngerman"
+toBabel ("dsb":_) = "lowersorbian"
+toBabel ("el":"polyton":_) = "polutonikogreek"
+toBabel ("en":"AU":_) = "australian"
+toBabel ("en":"CA":_) = "canadian"
+toBabel ("en":"GB":_) = "british"
+toBabel ("en":"NZ":_) = "newzealand"
+toBabel ("en":"UK":_) = "british"
+toBabel ("en":"US":_) = "american"
+toBabel ("fr":"CA":_) = "canadien"
+toBabel ("fra":"aca":_) = "acadian"
+toBabel ("grc":_) = "polutonikogreek"
+toBabel ("hsb":_) = "uppersorbian"
+toBabel ("la":"x":"classic":_) = "classiclatin"
+toBabel ("sl":_) = "slovene"
+toBabel x = commonFromBcp47 x
+
+-- Takes a list of the constituents of a BCP 47 language code
+-- and converts it to a string shared by Babel and Polyglossia.
+-- https://tools.ietf.org/html/bcp47#section-2.1
+commonFromBcp47 :: [String] -> String
+commonFromBcp47 [] = ""
+commonFromBcp47 ("pt":"BR":_) = "brazil"
+-- Note: documentation says "brazilian" works too, but it doesn't seem to work
+-- on some systems. See #2953.
+commonFromBcp47 ("sr":"Cyrl":_) = "serbianc"
+commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin"
+commonFromBcp47 x = fromIso $ head x
+ where
+ fromIso "af" = "afrikaans"
+ fromIso "am" = "amharic"
+ fromIso "ar" = "arabic"
+ fromIso "as" = "assamese"
+ fromIso "ast" = "asturian"
+ fromIso "bg" = "bulgarian"
+ fromIso "bn" = "bengali"
+ fromIso "bo" = "tibetan"
+ fromIso "br" = "breton"
+ fromIso "ca" = "catalan"
+ fromIso "cy" = "welsh"
+ fromIso "cs" = "czech"
+ fromIso "cop" = "coptic"
+ fromIso "da" = "danish"
+ fromIso "dv" = "divehi"
+ fromIso "el" = "greek"
+ fromIso "en" = "english"
+ fromIso "eo" = "esperanto"
+ fromIso "es" = "spanish"
+ fromIso "et" = "estonian"
+ fromIso "eu" = "basque"
+ fromIso "fa" = "farsi"
+ fromIso "fi" = "finnish"
+ fromIso "fr" = "french"
+ fromIso "fur" = "friulan"
+ fromIso "ga" = "irish"
+ fromIso "gd" = "scottish"
+ fromIso "gez" = "ethiopic"
+ fromIso "gl" = "galician"
+ fromIso "he" = "hebrew"
+ fromIso "hi" = "hindi"
+ fromIso "hr" = "croatian"
+ fromIso "hu" = "magyar"
+ fromIso "hy" = "armenian"
+ fromIso "ia" = "interlingua"
+ fromIso "id" = "indonesian"
+ fromIso "ie" = "interlingua"
+ fromIso "is" = "icelandic"
+ fromIso "it" = "italian"
+ fromIso "jp" = "japanese"
+ fromIso "km" = "khmer"
+ fromIso "kmr" = "kurmanji"
+ fromIso "kn" = "kannada"
+ fromIso "ko" = "korean"
+ fromIso "la" = "latin"
+ fromIso "lo" = "lao"
+ fromIso "lt" = "lithuanian"
+ fromIso "lv" = "latvian"
+ fromIso "ml" = "malayalam"
+ fromIso "mn" = "mongolian"
+ fromIso "mr" = "marathi"
+ fromIso "nb" = "norsk"
+ fromIso "nl" = "dutch"
+ fromIso "nn" = "nynorsk"
+ fromIso "no" = "norsk"
+ fromIso "nqo" = "nko"
+ fromIso "oc" = "occitan"
+ fromIso "pa" = "panjabi"
+ fromIso "pl" = "polish"
+ fromIso "pms" = "piedmontese"
+ fromIso "pt" = "portuguese"
+ fromIso "rm" = "romansh"
+ fromIso "ro" = "romanian"
+ fromIso "ru" = "russian"
+ fromIso "sa" = "sanskrit"
+ fromIso "se" = "samin"
+ fromIso "sk" = "slovak"
+ fromIso "sq" = "albanian"
+ fromIso "sr" = "serbian"
+ fromIso "sv" = "swedish"
+ fromIso "syr" = "syriac"
+ fromIso "ta" = "tamil"
+ fromIso "te" = "telugu"
+ fromIso "th" = "thai"
+ fromIso "ti" = "ethiopic"
+ fromIso "tk" = "turkmen"
+ fromIso "tr" = "turkish"
+ fromIso "uk" = "ukrainian"
+ fromIso "ur" = "urdu"
+ fromIso "vi" = "vietnamese"
+ fromIso _ = ""
+
+pDocumentOptions :: P.Parsec String () [String]
+pDocumentOptions = do
+ P.char '['
+ opts <- P.sepBy
+ (P.many $ P.spaces *> P.noneOf (" ,]" :: String) <* P.spaces)
+ (P.char ',')
+ P.char ']'
+ return opts
+
+pDocumentClass :: P.Parsec String () String
+pDocumentClass =
+ do P.skipMany (P.satisfy (/='\\'))
+ P.string "\\documentclass"
+ classOptions <- pDocumentOptions <|> return []
+ if ("article" :: String) `elem` classOptions
+ then return "article"
+ else do P.skipMany (P.satisfy (/='{'))
+ P.char '{'
+ P.manyTill P.letter (P.char '}')
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
new file mode 100644
index 000000000..f33acef32
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -0,0 +1,381 @@
+{-
+Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Man
+ Copyright : Copyright (C) 2007-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to groff man page format.
+
+-}
+module Text.Pandoc.Writers.Man ( writeMan) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Writers.Math
+import Text.Printf ( printf )
+import Data.List ( stripPrefix, intersperse, intercalate )
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Pretty
+import Text.Pandoc.Builder (deleteMeta)
+import Control.Monad.State
+import Text.Pandoc.Error
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+type Notes = [[Block]]
+data WriterState = WriterState { stNotes :: Notes
+ , stHasTables :: Bool }
+
+-- | Convert Pandoc to Man.
+writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeMan opts document = evalStateT (pandocToMan opts document) (WriterState [] False)
+
+-- | Return groff man representation of document.
+pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m String
+pandocToMan opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' = render colwidth
+ titleText <- inlineListToMan opts $ docTitle meta
+ let title' = render' titleText
+ let setFieldsFromTitle =
+ case break (== ' ') title' of
+ (cmdName, rest) -> case break (=='(') cmdName of
+ (xs, '(':ys) | not (null ys) &&
+ last ys == ')' ->
+ defField "title" xs .
+ defField "section" (init ys) .
+ case splitBy (=='|') rest of
+ (ft:hds) ->
+ defField "footer" (trim ft) .
+ defField "header"
+ (trim $ concat hds)
+ [] -> id
+ _ -> defField "title" title'
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToMan opts)
+ (fmap (render colwidth) . inlineListToMan opts)
+ $ deleteMeta "title" meta
+ body <- blockListToMan opts blocks
+ notes <- liftM stNotes get
+ notes' <- notesToMan opts (reverse notes)
+ let main = render' $ body $$ notes' $$ text ""
+ hasTables <- liftM stHasTables get
+ let context = defField "body" main
+ $ setFieldsFromTitle
+ $ defField "has-tables" hasTables
+ $ defField "hyphenate" True
+ $ defField "pandoc-version" pandocVersion
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Return man representation of notes.
+notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc
+notesToMan opts notes =
+ if null notes
+ then return empty
+ else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ return . (text ".SH NOTES" $$) . vcat
+
+-- | Return man representation of a note.
+noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc
+noteToMan opts num note = do
+ contents <- blockListToMan opts note
+ let marker = cr <> text ".SS " <> brackets (text (show num))
+ return $ marker $$ contents
+
+-- | Association list of characters to escape.
+manEscapes :: [(Char, String)]
+manEscapes = [ ('\160', "\\ ")
+ , ('\'', "\\[aq]")
+ , ('’', "'")
+ , ('\x2014', "\\[em]")
+ , ('\x2013', "\\[en]")
+ , ('\x2026', "\\&...")
+ ] ++ backslashEscapes "-@\\"
+
+-- | Escape special characters for Man.
+escapeString :: String -> String
+escapeString = escapeStringUsing manEscapes
+
+-- | Escape a literal (code) section for Man.
+escapeCode :: String -> String
+escapeCode = concat . intersperse "\n" . map escapeLine . lines where
+ escapeLine codeline =
+ case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of
+ a@('.':_) -> "\\&" ++ a
+ b -> b
+
+-- We split inline lists into sentences, and print one sentence per
+-- line. groff/troff treats the line-ending period differently.
+-- See http://code.google.com/p/pandoc/issues/detail?id=148.
+
+-- | Returns the first sentence in a list of inlines, and the rest.
+breakSentence :: [Inline] -> ([Inline], [Inline])
+breakSentence [] = ([],[])
+breakSentence xs =
+ let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
+ isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
+ isSentenceEndInline (LineBreak) = True
+ isSentenceEndInline _ = False
+ (as, bs) = break isSentenceEndInline xs
+ in case bs of
+ [] -> (as, [])
+ [c] -> (as ++ [c], [])
+ (c:Space:cs) -> (as ++ [c], cs)
+ (c:SoftBreak:cs) -> (as ++ [c], cs)
+ (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
+ (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
+ (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
+ (c:cs) -> (as ++ [c] ++ ds, es)
+ where (ds, es) = breakSentence cs
+
+-- | Split a list of inlines into sentences.
+splitSentences :: [Inline] -> [[Inline]]
+splitSentences xs =
+ let (sent, rest) = breakSentence xs
+ in if null rest then [sent] else sent : splitSentences rest
+
+-- | Convert Pandoc block element to man.
+blockToMan :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> StateT WriterState m Doc
+blockToMan _ Null = return empty
+blockToMan opts (Div _ bs) = blockListToMan opts bs
+blockToMan opts (Plain inlines) =
+ liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
+blockToMan opts (Para inlines) = do
+ contents <- liftM vcat $ mapM (inlineListToMan opts) $
+ splitSentences inlines
+ return $ text ".PP" $$ contents
+blockToMan opts (LineBlock lns) =
+ blockToMan opts $ linesToPara lns
+blockToMan _ b@(RawBlock f str)
+ | f == Format "man" = return $ text str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
+blockToMan opts (Header level _ inlines) = do
+ contents <- inlineListToMan opts inlines
+ let heading = case level of
+ 1 -> ".SH "
+ _ -> ".SS "
+ return $ text heading <> contents
+blockToMan _ (CodeBlock _ str) = return $
+ text ".IP" $$
+ text ".nf" $$
+ text "\\f[C]" $$
+ text (escapeCode str) $$
+ text "\\f[]" $$
+ text ".fi"
+blockToMan opts (BlockQuote blocks) = do
+ contents <- blockListToMan opts blocks
+ return $ text ".RS" $$ contents $$ text ".RE"
+blockToMan opts (Table caption alignments widths headers rows) =
+ let aligncode AlignLeft = "l"
+ aligncode AlignRight = "r"
+ aligncode AlignCenter = "c"
+ aligncode AlignDefault = "l"
+ in do
+ caption' <- inlineListToMan opts caption
+ modify $ \st -> st{ stHasTables = True }
+ let iwidths = if all (== 0) widths
+ then repeat ""
+ else map (printf "w(%0.1fn)" . (70 *)) widths
+ -- 78n default width - 8n indent = 70n
+ let coldescriptions = text $ intercalate " "
+ (zipWith (\align width -> aligncode align ++ width)
+ alignments iwidths) ++ "."
+ colheadings <- mapM (blockListToMan opts) headers
+ let makeRow cols = text "T{" $$
+ (vcat $ intersperse (text "T}@T{") cols) $$
+ text "T}"
+ let colheadings' = if all null headers
+ then empty
+ else makeRow colheadings $$ char '_'
+ body <- mapM (\row -> do
+ cols <- mapM (blockListToMan opts) row
+ return $ makeRow cols) rows
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "tab(@);" $$ coldescriptions $$
+ colheadings' $$ vcat body $$ text ".TE"
+
+blockToMan opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMan opts) items
+ return (vcat contents)
+blockToMan opts (OrderedList attribs items) = do
+ let markers = take (length items) $ orderedListMarkers attribs
+ let indent = 1 + (maximum $ map length markers)
+ contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
+ zip markers items
+ return (vcat contents)
+blockToMan opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMan opts) items
+ return (vcat contents)
+
+-- | Convert bullet list item (list of blocks) to man.
+bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc
+bulletListItemToMan _ [] = return empty
+bulletListItemToMan opts ((Para first):rest) =
+ bulletListItemToMan opts ((Plain first):rest)
+bulletListItemToMan opts ((Plain first):rest) = do
+ first' <- blockToMan opts (Plain first)
+ rest' <- blockListToMan opts rest
+ let first'' = text ".IP \\[bu] 2" $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 2" $$ rest' $$ text ".RE"
+ return (first'' $$ rest'')
+bulletListItemToMan opts (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
+
+-- | Convert ordered list item (a list of blocks) to man.
+orderedListItemToMan :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> StateT WriterState m Doc
+orderedListItemToMan _ _ _ [] = return empty
+orderedListItemToMan opts num indent ((Para first):rest) =
+ orderedListItemToMan opts num indent ((Plain first):rest)
+orderedListItemToMan opts num indent (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ let num' = printf ("%" ++ show (indent - 1) ++ "s") num
+ let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 4" $$ rest' $$ text ".RE"
+ return $ first'' $$ rest''
+
+-- | Convert definition list item (label, list of blocks) to man.
+definitionListItemToMan :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> StateT WriterState m Doc
+definitionListItemToMan opts (label, defs) = do
+ labelText <- inlineListToMan opts label
+ contents <- if null defs
+ then return empty
+ else liftM vcat $ forM defs $ \blocks -> do
+ (first, rest) <- case blocks of
+ ((Para x):y) -> return (Plain x,y)
+ (x:y) -> return (x,y)
+ [] -> throwError $ PandocSomeError "blocks is null"
+ rest' <- liftM vcat $
+ mapM (\item -> blockToMan opts item) rest
+ first' <- blockToMan opts first
+ return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents
+
+-- | Convert list of Pandoc block elements to man.
+blockListToMan :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
+blockListToMan opts blocks =
+ mapM (blockToMan opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to man.
+inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc
+inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
+
+-- | Convert Pandoc inline element to man.
+inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc
+inlineToMan opts (Span _ ils) = inlineListToMan opts ils
+inlineToMan opts (Emph lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[I]" <> contents <> text "\\f[]"
+inlineToMan opts (Strong lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[B]" <> contents <> text "\\f[]"
+inlineToMan opts (Strikeout lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToMan opts (Superscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '^' <> contents <> char '^'
+inlineToMan opts (Subscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '~' <> contents <> char '~'
+inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported
+inlineToMan opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '`' <> contents <> char '\''
+inlineToMan opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\[lq]" <> contents <> text "\\[rq]"
+inlineToMan opts (Cite _ lst) =
+ inlineListToMan opts lst
+inlineToMan _ (Code _ str) =
+ return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
+inlineToMan _ (Str str@('.':_)) =
+ return $ afterBreak "\\&" <> text (escapeString str)
+inlineToMan _ (Str str) = return $ text $ escapeString str
+inlineToMan opts (Math InlineMath str) =
+ lift (texMathToInlines InlineMath str) >>= inlineListToMan opts
+inlineToMan opts (Math DisplayMath str) = do
+ contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts
+ return $ cr <> text ".RS" $$ contents $$ text ".RE"
+inlineToMan _ il@(RawInline f str)
+ | f == Format "man" = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToMan _ LineBreak = return $
+ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
+inlineToMan _ SoftBreak = return space
+inlineToMan _ Space = return space
+inlineToMan opts (Link _ txt (src, _)) = do
+ linktext <- inlineListToMan opts txt
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
+ return $ case txt of
+ [Str s]
+ | escapeURI s == srcSuffix ->
+ char '<' <> text srcSuffix <> char '>'
+ _ -> linktext <> text " (" <> text src <> char ')'
+inlineToMan opts (Image attr alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMan opts (Link attr txt (source, tit))
+ return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
+inlineToMan _ (Note contents) = do
+ -- add to notes in state
+ modify $ \st -> st{ stNotes = contents : stNotes st }
+ notes <- liftM stNotes get
+ let ref = show $ (length notes)
+ return $ char '[' <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
new file mode 100644
index 000000000..a97c32542
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -0,0 +1,1147 @@
+{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Markdown
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to markdown-formatted plain text.
+
+Markdown: <http://daringfireball.net/projects/markdown/>
+-}
+module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
+import Data.Maybe (fromMaybe)
+import Data.Monoid (Any(..))
+import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy )
+import Data.Char ( isSpace, isPunctuation, ord, chr )
+import Data.Ord ( comparing )
+import Text.Pandoc.Pretty
+import Control.Monad.Reader
+import Control.Monad.State
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
+import Network.URI (isURI)
+import Data.Default
+import Data.Yaml (Value(Object,String,Array,Bool,Number))
+import qualified Data.HashMap.Strict as H
+import qualified Data.Vector as V
+import qualified Data.Text as T
+import qualified Data.Set as Set
+import Network.HTTP ( urlEncode )
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+type Notes = [[Block]]
+type Ref = ([Inline], Target, Attr)
+type Refs = [Ref]
+
+type MD m = ReaderT WriterEnv (StateT WriterState m)
+
+evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
+evalMD md env st = evalStateT (runReaderT md env) st
+
+data WriterEnv = WriterEnv { envInList :: Bool
+ , envPlain :: Bool
+ , envRefShortcutable :: Bool
+ , envBlockLevel :: Int
+ , envEscapeSpaces :: Bool
+ }
+
+instance Default WriterEnv
+ where def = WriterEnv { envInList = False
+ , envPlain = False
+ , envRefShortcutable = True
+ , envBlockLevel = 0
+ , envEscapeSpaces = False
+ }
+
+data WriterState = WriterState { stNotes :: Notes
+ , stRefs :: Refs
+ , stIds :: Set.Set String
+ , stNoteNum :: Int
+ }
+
+instance Default WriterState
+ where def = WriterState{ stNotes = []
+ , stRefs = []
+ , stIds = Set.empty
+ , stNoteNum = 1
+ }
+
+-- | Convert Pandoc to Markdown.
+writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeMarkdown opts document =
+ evalMD (pandocToMarkdown opts{
+ writerWrapText = if isEnabled Ext_hard_line_breaks opts
+ then WrapNone
+ else writerWrapText opts }
+ document) def def
+
+-- | Convert Pandoc to plain text (like markdown, but without links,
+-- pictures, or inline formatting).
+writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writePlain opts document =
+ evalMD (pandocToMarkdown opts document) def{ envPlain = True } def
+
+pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc
+pandocTitleBlock tit auths dat =
+ hang 2 (text "% ") tit <> cr <>
+ hang 2 (text "% ") (vcat $ map nowrap auths) <> cr <>
+ hang 2 (text "% ") dat <> cr
+
+mmdTitleBlock :: Value -> Doc
+mmdTitleBlock (Object hashmap) =
+ vcat $ map go $ sortBy (comparing fst) $ H.toList hashmap
+ where go (k,v) =
+ case (text (T.unpack k), v) of
+ (k', Array vec)
+ | V.null vec -> empty
+ | otherwise -> k' <> ":" <> space <>
+ hcat (intersperse "; "
+ (map fromstr $ V.toList vec))
+ (_, String "") -> empty
+ (k', x) -> k' <> ":" <> space <> nest 2 (fromstr x)
+ fromstr (String s) = text (removeBlankLines $ T.unpack s)
+ fromstr (Bool b) = text (show b)
+ fromstr (Number n) = text (show n)
+ fromstr _ = empty
+ -- blank lines not allowed in MMD metadata - we replace with .
+ removeBlankLines = trimr . unlines . map (\x ->
+ if all isSpace x then "." else x) . lines
+mmdTitleBlock _ = empty
+
+plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc
+plainTitleBlock tit auths dat =
+ tit <> cr <>
+ (hcat (intersperse (text "; ") auths)) <> cr <>
+ dat <> cr
+
+yamlMetadataBlock :: Value -> Doc
+yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "---"
+
+jsonToYaml :: Value -> Doc
+jsonToYaml (Object hashmap) =
+ vcat $ map (\(k,v) ->
+ case (text (T.unpack k), v, jsonToYaml v) of
+ (k', Array vec, x)
+ | V.null vec -> empty
+ | otherwise -> (k' <> ":") $$ x
+ (k', Object _, x) -> (k' <> ":") $$ nest 2 x
+ (_, String "", _) -> empty
+ (k', _, x) | k == "meta-json" -> empty
+ | otherwise -> k' <> ":" <> space <> hang 2 "" x)
+ $ sortBy (comparing fst) $ H.toList hashmap
+jsonToYaml (Array vec) =
+ vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec
+jsonToYaml (String "") = empty
+jsonToYaml (String s) =
+ case T.unpack s of
+ x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x
+ | not (any isPunctuation x) -> text x
+ | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'"
+jsonToYaml (Bool b) = text $ show b
+jsonToYaml (Number n) = text $ show n
+jsonToYaml _ = empty
+
+-- | Return markdown representation of document.
+pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String
+pandocToMarkdown opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ isPlain <- asks envPlain
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToMarkdown opts)
+ (fmap (render colwidth) . inlineListToMarkdown opts)
+ meta
+ let title' = maybe empty text $ getField "title" metadata
+ let authors' = maybe [] (map text) $ getField "author" metadata
+ let date' = maybe empty text $ getField "date" metadata
+ let titleblock = case writerTemplate opts of
+ Just _ | isPlain ->
+ plainTitleBlock title' authors' date'
+ | isEnabled Ext_yaml_metadata_block opts ->
+ yamlMetadataBlock metadata
+ | isEnabled Ext_pandoc_title_block opts ->
+ pandocTitleBlock title' authors' date'
+ | isEnabled Ext_mmd_title_block opts ->
+ mmdTitleBlock metadata
+ | otherwise -> empty
+ Nothing -> empty
+ let headerBlocks = filter isHeaderBlock blocks
+ toc <- if writerTableOfContents opts
+ then tableOfContents opts headerBlocks
+ else return empty
+ -- Strip off final 'references' header if markdown citations enabled
+ let blocks' = if isEnabled Ext_citations opts
+ then case reverse blocks of
+ (Div (_,["references"],_) _):xs -> reverse xs
+ _ -> blocks
+ else blocks
+ body <- blockListToMarkdown opts blocks'
+ notesAndRefs' <- notesAndRefs opts
+ let render' :: Doc -> String
+ render' = render colwidth
+ let main = render' $ body <> notesAndRefs'
+ let context = defField "toc" (render' toc)
+ $ defField "body" main
+ $ (if isNullMeta meta
+ then id
+ else defField "titleblock" (render' titleblock))
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Return markdown representation of reference key table.
+refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc
+refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: PandocMonad m
+ => WriterOptions
+ -> Ref
+ -> MD m Doc
+keyToMarkdown opts (label, (src, tit), attr) = do
+ label' <- inlineListToMarkdown opts label
+ let tit' = if null tit
+ then empty
+ else space <> "\"" <> text tit <> "\""
+ return $ nest 2 $ hang 2
+ ("[" <> label' <> "]:" <> space) (text src <> tit')
+ <> linkAttributes opts attr
+
+-- | Return markdown representation of notes.
+notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc
+notesToMarkdown opts notes = do
+ n <- gets stNoteNum
+ notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes)
+ modify $ \st -> st { stNoteNum = stNoteNum st + length notes }
+ return $ vsep notes'
+
+-- | Return markdown representation of a note.
+noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc
+noteToMarkdown opts num blocks = do
+ contents <- blockListToMarkdown opts blocks
+ let num' = text $ writerIdentifierPrefix opts ++ show num
+ let marker = if isEnabled Ext_footnotes opts
+ then text "[^" <> num' <> text "]:"
+ else text "[" <> num' <> text "]"
+ let markerSize = 4 + offset num'
+ let spacer = case writerTabStop opts - markerSize of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ return $ if isEnabled Ext_footnotes opts
+ then hang (writerTabStop opts) (marker <> spacer) contents
+ else marker <> spacer <> contents
+
+-- | Escape special characters for Markdown.
+escapeString :: WriterOptions -> String -> String
+escapeString _ [] = []
+escapeString opts (c:cs) =
+ case c of
+ '<' -> "&lt;" ++ escapeString opts cs
+ '>' -> "&gt;" ++ escapeString opts cs
+ _ | c `elem` ['\\','`','*','_','[',']','#'] ->
+ '\\':c:escapeString opts cs
+ '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs
+ '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs
+ '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs
+ '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
+ '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
+ '-' | isEnabled Ext_smart opts ->
+ case cs of
+ '-':_ -> '\\':'-':escapeString opts cs
+ _ -> '-':escapeString opts cs
+ '.' | isEnabled Ext_smart opts ->
+ case cs of
+ '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest
+ _ -> '.':escapeString opts cs
+ _ -> c : escapeString opts cs
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> m Doc
+tableOfContents opts headers =
+ let contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers
+ in evalMD (blockToMarkdown opts contents) def def
+
+-- | Converts an Element to a list item for a table of contents,
+elementToListItem :: WriterOptions -> Element -> [Block]
+elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs)
+ = Plain headerLink :
+ [ BulletList (map (elementToListItem opts) subsecs) |
+ not (null subsecs) && lev < writerTOCDepth opts ]
+ where headerLink = if null ident
+ then walk deNote headerText
+ else [Link nullAttr (walk deNote headerText)
+ ('#':ident, "")]
+elementToListItem _ (Blk _) = []
+
+attrsToMarkdown :: Attr -> Doc
+attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
+ where attribId = case attribs of
+ ([],_,_) -> empty
+ (i,_,_) -> "#" <> text i
+ attribClasses = case attribs of
+ (_,[],_) -> empty
+ (_,cs,_) -> hsep $
+ map (text . ('.':))
+ cs
+ attribKeys = case attribs of
+ (_,_,[]) -> empty
+ (_,_,ks) -> hsep $
+ map (\(k,v) -> text k
+ <> "=\"" <> text v <> "\"") ks
+
+linkAttributes :: WriterOptions -> Attr -> Doc
+linkAttributes opts attr =
+ if isEnabled Ext_link_attributes opts && attr /= nullAttr
+ then attrsToMarkdown attr
+ else empty
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: Parser [Char] ParserState Char
+olMarker = do (start, style', delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style' == UpperAlpha || (style' == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case runParser olMarker defaultParserState "para start" (take 10 str) of
+ Left _ -> False
+ Right _ -> True
+
+notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc
+notesAndRefs opts = do
+ notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts
+ modify $ \s -> s { stNotes = [] }
+ refs' <- reverse <$> gets stRefs >>= refsToMarkdown opts
+ modify $ \s -> s { stRefs = [] }
+
+ let endSpacing =
+ if | writerReferenceLocation opts == EndOfDocument -> empty
+ | isEmpty notes' && isEmpty refs' -> empty
+ | otherwise -> blankline
+
+ return $
+ (if isEmpty notes' then empty else blankline <> notes') <>
+ (if isEmpty refs' then empty else blankline <> refs') <>
+ endSpacing
+
+-- | Convert Pandoc block element to markdown.
+blockToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> MD m Doc
+blockToMarkdown opts blk =
+ local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $
+ do doc <- blockToMarkdown' opts blk
+ blkLevel <- asks envBlockLevel
+ if writerReferenceLocation opts == EndOfBlock && blkLevel == 1
+ then notesAndRefs opts >>= (\d -> return $ doc <> d)
+ else return doc
+
+blockToMarkdown' :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> MD m Doc
+blockToMarkdown' _ Null = return empty
+blockToMarkdown' opts (Div attrs ils) = do
+ contents <- blockListToMarkdown opts ils
+ return $ if isEnabled Ext_raw_html opts &&
+ isEnabled Ext_markdown_in_html_blocks opts
+ then tagWithAttrs "div" attrs <> blankline <>
+ contents <> blankline <> "</div>" <> blankline
+ else contents <> blankline
+blockToMarkdown' opts (Plain inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ -- escape if para starts with ordered list marker
+ isPlain <- asks envPlain
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let rendered = render colwidth contents
+ let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs
+ | otherwise = x : escapeDelimiter xs
+ escapeDelimiter [] = []
+ let contents' = if isEnabled Ext_all_symbols_escapable opts &&
+ not isPlain && beginsWithOrderedListMarker rendered
+ then text $ escapeDelimiter rendered
+ else contents
+ return $ contents' <> cr
+-- title beginning with fig: indicates figure
+blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
+ blockToMarkdown opts (Para [Image attr alt (src,tit)])
+blockToMarkdown' opts (Para inlines) =
+ (<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
+blockToMarkdown' opts (LineBlock lns) =
+ if isEnabled Ext_line_blocks opts
+ then do
+ mdLines <- mapM (inlineListToMarkdown opts) lns
+ return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline
+ else blockToMarkdown opts $ linesToPara lns
+blockToMarkdown' opts b@(RawBlock f str)
+ | f == "markdown" = return $ text str <> text "\n"
+ | f == "html" && isEnabled Ext_raw_html opts = do
+ plain <- asks envPlain
+ return $ if plain
+ then empty
+ else if isEnabled Ext_markdown_attribute opts
+ then text (addMarkdownAttribute str) <> text "\n"
+ else text str <> text "\n"
+ | f `elem` ["latex", "tex"] && isEnabled Ext_raw_tex opts = do
+ plain <- asks envPlain
+ return $ if plain
+ then empty
+ else text str <> text "\n"
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+blockToMarkdown' opts HorizontalRule = do
+ return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline
+blockToMarkdown' opts (Header level attr inlines) = do
+ -- first, if we're putting references at the end of a section, we
+ -- put them here.
+ blkLevel <- asks envBlockLevel
+ refs <- if writerReferenceLocation opts == EndOfSection && blkLevel == 1
+ then notesAndRefs opts
+ else return empty
+
+ plain <- asks envPlain
+ -- we calculate the id that would be used by auto_identifiers
+ -- so we know whether to print an explicit identifier
+ ids <- gets stIds
+ let autoId = uniqueIdent inlines ids
+ modify $ \st -> st{ stIds = Set.insert autoId ids }
+ let attr' = case attr of
+ ("",[],[]) -> empty
+ (id',[],[]) | isEnabled Ext_auto_identifiers opts
+ && id' == autoId -> empty
+ (id',_,_) | isEnabled Ext_mmd_header_identifiers opts ->
+ space <> brackets (text id')
+ _ | isEnabled Ext_header_attributes opts ->
+ space <> attrsToMarkdown attr
+ | otherwise -> empty
+ contents <- inlineListToMarkdown opts $
+ if level == 1 && plain
+ then capitalize inlines
+ else inlines
+ let setext = writerSetextHeaders opts
+ hdr = nowrap $ case level of
+ 1 | plain -> blanklines 3 <> contents <> blanklines 2
+ | setext ->
+ contents <> attr' <> cr <> text (replicate (offset contents) '=') <>
+ blankline
+ 2 | plain -> blanklines 2 <> contents <> blankline
+ | setext ->
+ contents <> attr' <> cr <> text (replicate (offset contents) '-') <>
+ blankline
+ -- ghc interprets '#' characters in column 1 as linenum specifiers.
+ _ | plain || isEnabled Ext_literate_haskell opts ->
+ contents <> blankline
+ _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline
+
+ return $ refs <> hdr
+blockToMarkdown' opts (CodeBlock (_,classes,_) str)
+ | "haskell" `elem` classes && "literate" `elem` classes &&
+ isEnabled Ext_literate_haskell opts =
+ return $ prefixed "> " (text str) <> blankline
+blockToMarkdown' opts (CodeBlock attribs str) = return $
+ case attribs == nullAttr of
+ False | isEnabled Ext_backtick_code_blocks opts ->
+ backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline
+ | isEnabled Ext_fenced_code_blocks opts ->
+ tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline
+ _ -> nest (writerTabStop opts) (text str) <> blankline
+ where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of
+ [] -> "~~~~"
+ xs -> case maximum $ map length xs of
+ n | n < 3 -> "~~~~"
+ | otherwise -> replicate (n+1) '~'
+ backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of
+ [] -> "```"
+ xs -> case maximum $ map length xs of
+ n | n < 3 -> "```"
+ | otherwise -> replicate (n+1) '`'
+ attrs = if isEnabled Ext_fenced_code_attributes opts
+ then nowrap $ " " <> attrsToMarkdown attribs
+ else case attribs of
+ (_,(cls:_),_) -> " " <> text cls
+ _ -> empty
+blockToMarkdown' opts (BlockQuote blocks) = do
+ plain <- asks envPlain
+ -- if we're writing literate haskell, put a space before the bird tracks
+ -- so they won't be interpreted as lhs...
+ let leader = if isEnabled Ext_literate_haskell opts
+ then " > "
+ else if plain then " " else "> "
+ contents <- blockListToMarkdown opts blocks
+ return $ (prefixed leader contents) <> blankline
+blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
+ caption' <- inlineListToMarkdown opts caption
+ let caption'' = if null caption || not (isEnabled Ext_table_captions opts)
+ then empty
+ else blankline <> ": " <> caption' <> blankline
+ rawHeaders <- mapM (blockListToMarkdown opts) headers
+ rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
+ let isLineBreak LineBreak = Any True
+ isLineBreak _ = Any False
+ let isSimple = all (==0) widths &&
+ not ( getAny (query isLineBreak (headers:rows)) )
+ let isPlainBlock (Plain _) = True
+ isPlainBlock _ = False
+ let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows)
+ (nst,tbl) <- case True of
+ _ | isSimple &&
+ isEnabled Ext_simple_tables opts -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | isSimple &&
+ isEnabled Ext_pipe_tables opts -> fmap (id,) $
+ pipeTable (all null headers) aligns rawHeaders rawRows
+ | not hasBlocks &&
+ isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $
+ pandocTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | isEnabled Ext_grid_tables opts -> fmap (id,) $
+ gridTable opts (all null headers) aligns widths
+ rawHeaders rawRows
+ | isEnabled Ext_raw_html opts -> fmap (id,) $
+ text <$>
+ (writeHtml5String def $ Pandoc nullMeta [t])
+ | otherwise -> return $ (id, text "[TABLE]")
+ return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
+blockToMarkdown' opts (BulletList items) = do
+ contents <- inList $ mapM (bulletListItemToMarkdown opts) items
+ return $ cat contents <> blankline
+blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do
+ let start' = if isEnabled Ext_startnum opts then start else 1
+ let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle
+ let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim
+ let attribs = (start', sty', delim')
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- inList $
+ mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip markers' items
+ return $ cat contents <> blankline
+blockToMarkdown' opts (DefinitionList items) = do
+ contents <- inList $ mapM (definitionListItemToMarkdown opts) items
+ return $ cat contents <> blankline
+
+inList :: Monad m => MD m a -> MD m a
+inList p = local (\env -> env {envInList = True}) p
+
+addMarkdownAttribute :: String -> String
+addMarkdownAttribute s =
+ case span isTagText $ reverse $ parseTags s of
+ (xs,(TagOpen t attrs:rest)) ->
+ renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs)
+ where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs,
+ x /= "markdown"]
+ _ -> s
+
+pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc
+pipeTable headless aligns rawHeaders rawRows = do
+ let sp = text " "
+ let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
+ blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty
+ blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty
+ blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
+ let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
+ let torow cs = nowrap $ text "|" <>
+ hcat (intersperse (text "|") $
+ zipWith3 blockFor aligns widths (map chomp cs))
+ <> text "|"
+ let toborder (a, w) = text $ case a of
+ AlignLeft -> ':':replicate (w + 1) '-'
+ AlignCenter -> ':':replicate w '-' ++ ":"
+ AlignRight -> replicate (w + 1) '-' ++ ":"
+ AlignDefault -> replicate (w + 2) '-'
+ -- note: pipe tables can't completely lack a
+ -- header; for a headerless table, we need a header of empty cells.
+ -- see jgm/pandoc#1996.
+ let header = if headless
+ then torow (replicate (length aligns) empty)
+ else torow rawHeaders
+ let border = nowrap $ text "|" <> hcat (intersperse (text "|") $
+ map toborder $ zip aligns widths) <> text "|"
+ let body = vcat $ map torow rawRows
+ return $ header $$ border $$ body
+
+pandocTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> MD m Doc
+pandocTable opts headless aligns widths rawHeaders rawRows = do
+ let isSimple = all (==0) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> lblock
+ AlignCenter -> cblock
+ AlignRight -> rblock
+ AlignDefault -> lblock
+ -- Number of characters per column necessary to output every cell
+ -- without requiring a line break.
+ -- The @+2@ is needed for specifying the alignment.
+ let numChars = (+ 2) . maximum . map offset
+ -- Number of characters per column necessary to output every cell
+ -- without requiring a line break *inside a word*.
+ -- The @+2@ is needed for specifying the alignment.
+ let minNumChars = (+ 2) . maximum . map minOffset
+ let columns = transpose (rawHeaders : rawRows)
+ -- minimal column width without wrapping a single word
+ let noWordWrapWidth
+ | writerWrapText opts == WrapAuto
+ = fromIntegral $ maximum (map minNumChars columns)
+ | otherwise = fromIntegral $ maximum (map numChars columns)
+ let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w)
+ (noWordWrapWidth * w / minimum widths)
+ let widthsInChars
+ | isSimple = map numChars columns
+ | otherwise = map relWidth widths
+ let makeRow = hcat . intersperse (lblock 1 (text " ")) .
+ (zipWith3 alignHeader aligns widthsInChars)
+ let rows' = map makeRow rawRows
+ let head' = makeRow rawHeaders
+ let maxRowHeight = maximum $ map height (head':rows')
+ let underline = cat $ intersperse (text " ") $
+ map (\width -> text (replicate width '-')) widthsInChars
+ let border = if maxRowHeight > 1
+ then text (replicate (sum widthsInChars +
+ length widthsInChars - 1) '-')
+ else if headless
+ then underline
+ else empty
+ let head'' = if headless
+ then empty
+ else border <> cr <> head'
+ let body = if maxRowHeight > 1
+ then vsep rows'
+ else vcat rows'
+ let bottom = if headless
+ then underline
+ else border
+ return $ head'' $$ underline $$ body $$ bottom
+
+gridTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double]
+ -> [Doc] -> [[Doc]] -> MD m Doc
+gridTable opts headless aligns widths headers' rawRows = do
+ let numcols = length headers'
+ let widths' = if all (==0) widths
+ then replicate numcols (1.0 / fromIntegral numcols)
+ else widths
+ let widthsInChars = map
+ ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths'
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (1 : map height blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = chomp $ hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow headers'
+ let rows' = map (makeRow . map chomp) rawRows
+ let borderpart ch align widthInChars =
+ let widthInChars' = if widthInChars < 1 then 1 else widthInChars
+ in (if (align == AlignLeft || align == AlignCenter)
+ then char ':'
+ else char ch) <>
+ text (replicate widthInChars' ch) <>
+ (if (align == AlignRight || align == AlignCenter)
+ then char ':'
+ else char ch)
+ let border ch aligns' widthsInChars' =
+ char '+' <>
+ hcat (intersperse (char '+') (zipWith (borderpart ch)
+ aligns' widthsInChars')) <> char '+'
+ let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars)
+ rows'
+ let head'' = if headless
+ then empty
+ else head' $$ border '=' aligns widthsInChars
+ if headless
+ then return $
+ border '-' aligns widthsInChars $$
+ body $$
+ border '-' (repeat AlignDefault) widthsInChars
+ else return $
+ border '-' (repeat AlignDefault) widthsInChars $$
+ head'' $$
+ body $$
+ border '-' (repeat AlignDefault) widthsInChars
+
+itemEndsWithTightList :: [Block] -> Bool
+itemEndsWithTightList bs =
+ case bs of
+ [Plain _, BulletList xs] -> isTightList xs
+ [Plain _, OrderedList _ xs] -> isTightList xs
+ _ -> False
+
+-- | Convert bullet list item (list of blocks) to markdown.
+bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc
+bulletListItemToMarkdown opts bs = do
+ contents <- blockListToMarkdown opts bs
+ let sps = replicate (writerTabStop opts - 2) ' '
+ let start = text ('-' : ' ' : sps)
+ -- remove trailing blank line if item ends with a tight list
+ let contents' = if itemEndsWithTightList bs
+ then chomp contents <> cr
+ else contents
+ return $ hang (writerTabStop opts) start $ contents' <> cr
+
+-- | Convert ordered list item (a list of blocks) to markdown.
+orderedListItemToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> MD m Doc
+orderedListItemToMarkdown opts marker bs = do
+ contents <- blockListToMarkdown opts bs
+ let sps = case length marker - writerTabStop opts of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ let start = text marker <> sps
+ -- remove trailing blank line if item ends with a tight list
+ let contents' = if itemEndsWithTightList bs
+ then chomp contents <> cr
+ else contents
+ return $ hang (writerTabStop opts) start $ contents' <> cr
+
+-- | Convert definition list item (label, list of blocks) to markdown.
+definitionListItemToMarkdown :: PandocMonad m
+ => WriterOptions
+ -> ([Inline],[[Block]])
+ -> MD m Doc
+definitionListItemToMarkdown opts (label, defs) = do
+ labelText <- inlineListToMarkdown opts label
+ defs' <- mapM (mapM (blockToMarkdown opts)) defs
+ if isEnabled Ext_definition_lists opts
+ then do
+ let tabStop = writerTabStop opts
+ isPlain <- asks envPlain
+ let leader = if isPlain then " " else ": "
+ let sps = case writerTabStop opts - 3 of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ if isEnabled Ext_compact_definition_lists opts
+ then do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ return $ nowrap labelText <> cr <> contents <> cr
+ else do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ let isTight = case defs of
+ ((Plain _ : _): _) -> True
+ _ -> False
+ return $ blankline <> nowrap labelText <>
+ (if isTight then cr else blankline) <> contents <> blankline
+ else do
+ return $ nowrap labelText <> text " " <> cr <>
+ vsep (map vsep defs') <> blankline
+
+-- | Convert list of Pandoc block elements to markdown.
+blockListToMarkdown :: PandocMonad m
+ => WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> MD m Doc
+blockListToMarkdown opts blocks =
+ mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
+ -- insert comment between list and indented code block, or the
+ -- code block will be treated as a list continuation paragraph
+ where fixBlocks (b : CodeBlock attr x : rest)
+ | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
+ && isListBlock b = b : commentSep : CodeBlock attr x :
+ fixBlocks rest
+ fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (x : xs) = x : fixBlocks xs
+ fixBlocks [] = []
+ isListBlock (BulletList _) = True
+ isListBlock (OrderedList _ _) = True
+ isListBlock (DefinitionList _) = True
+ isListBlock _ = False
+ commentSep = if isEnabled Ext_raw_html opts
+ then RawBlock "html" "<!-- -->\n"
+ else RawBlock "markdown" "&nbsp;"
+
+-- | Get reference for target; if none exists, create unique one and return.
+-- Prefer label if possible; otherwise, generate a unique key.
+getReference :: PandocMonad m => Attr -> [Inline] -> Target -> MD m [Inline]
+getReference attr label target = do
+ st <- get
+ case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of
+ Just (ref, _, _) -> return ref
+ Nothing -> do
+ label' <- case find (\(l,_,_) -> l == label) (stRefs st) of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> notElem [Str (show n)]
+ (map (\(l,_,_) -> l) (stRefs st)))
+ [1..(10000 :: Integer)] of
+ Just x -> return [Str (show x)]
+ Nothing -> throwError $ PandocSomeError "no unique label"
+ Nothing -> return label
+ modify (\s -> s{ stRefs = (label', target, attr) : stRefs st })
+ return label'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc
+inlineListToMarkdown opts lst = do
+ inlist <- asks envInList
+ go (if inlist then avoidBadWrapsInList lst else lst)
+ where go [] = return empty
+ go (i:is) = case i of
+ (Link _ _ _) -> case is of
+ -- If a link is followed by another link or '[' we don't shortcut
+ (Link _ _ _):_ -> unshortcutable
+ Space:(Link _ _ _):_ -> unshortcutable
+ Space:(Str('[':_)):_ -> unshortcutable
+ Space:(RawInline _ ('[':_)):_ -> unshortcutable
+ Space:(Cite _ _):_ -> unshortcutable
+ SoftBreak:(Link _ _ _):_ -> unshortcutable
+ SoftBreak:(Str('[':_)):_ -> unshortcutable
+ SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable
+ SoftBreak:(Cite _ _):_ -> unshortcutable
+ (Cite _ _):_ -> unshortcutable
+ Str ('[':_):_ -> unshortcutable
+ (RawInline _ ('[':_)):_ -> unshortcutable
+ (RawInline _ (' ':'[':_)):_ -> unshortcutable
+ _ -> shortcutable
+ _ -> shortcutable
+ where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
+ unshortcutable = do
+ iMark <- local
+ (\env -> env { envRefShortcutable = False })
+ (inlineToMarkdown opts i)
+ fmap (iMark <>) (go is)
+
+isSp :: Inline -> Bool
+isSp Space = True
+isSp SoftBreak = True
+isSp _ = False
+
+avoidBadWrapsInList :: [Inline] -> [Inline]
+avoidBadWrapsInList [] = []
+avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s =
+ Str (' ':'>':cs) : avoidBadWrapsInList xs
+avoidBadWrapsInList (s:Str [c]:[])
+ | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : []
+avoidBadWrapsInList (s:Str [c]:Space:xs)
+ | isSp s && c `elem` ['-','*','+'] =
+ Str [' ', c] : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (s:Str cs:Space:xs)
+ | isSp s && isOrderedListMarker cs =
+ Str (' ':cs) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (s:Str cs:[])
+ | isSp s && isOrderedListMarker cs = Str (' ':cs) : []
+avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
+
+isOrderedListMarker :: String -> Bool
+isOrderedListMarker xs = (last xs `elem` ['.',')']) &&
+ isRight (runParser (anyOrderedListMarker >> eof)
+ defaultParserState "" xs)
+
+isRight :: Either a b -> Bool
+isRight (Right _) = True
+isRight (Left _) = False
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc
+inlineToMarkdown opts (Span attrs ils) = do
+ plain <- asks envPlain
+ contents <- inlineListToMarkdown opts ils
+ return $ case plain of
+ True -> contents
+ False | isEnabled Ext_bracketed_spans opts ->
+ "[" <> contents <> "]" <>
+ if attrs == nullAttr
+ then "{}"
+ else linkAttributes opts attrs
+ | isEnabled Ext_raw_html opts ||
+ isEnabled Ext_native_spans opts ->
+ tagWithAttrs "span" attrs <> contents <> text "</span>"
+ | otherwise -> contents
+inlineToMarkdown opts (Emph lst) = do
+ plain <- asks envPlain
+ contents <- inlineListToMarkdown opts lst
+ return $ if plain
+ then "_" <> contents <> "_"
+ else "*" <> contents <> "*"
+inlineToMarkdown opts (Strong lst) = do
+ plain <- asks envPlain
+ if plain
+ then inlineListToMarkdown opts $ capitalize lst
+ else do
+ contents <- inlineListToMarkdown opts lst
+ return $ "**" <> contents <> "**"
+inlineToMarkdown opts (Strikeout lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ if isEnabled Ext_strikeout opts
+ then "~~" <> contents <> "~~"
+ else if isEnabled Ext_raw_html opts
+ then "<s>" <> contents <> "</s>"
+ else contents
+inlineToMarkdown opts (Superscript lst) =
+ local (\env -> env {envEscapeSpaces = True}) $ do
+ contents <- inlineListToMarkdown opts lst
+ return $ if isEnabled Ext_superscript opts
+ then "^" <> contents <> "^"
+ else if isEnabled Ext_raw_html opts
+ then "<sup>" <> contents <> "</sup>"
+ else case (render Nothing contents) of
+ ds | all (\d -> d >= '0' && d <= '9') ds
+ -> text (map toSuperscript ds)
+ _ -> contents
+ where toSuperscript '1' = '\x00B9'
+ toSuperscript '2' = '\x00B2'
+ toSuperscript '3' = '\x00B3'
+ toSuperscript c = chr (0x2070 + (ord c - 48))
+inlineToMarkdown opts (Subscript lst) =
+ local (\env -> env {envEscapeSpaces = True}) $ do
+ contents <- inlineListToMarkdown opts lst
+ return $ if isEnabled Ext_subscript opts
+ then "~" <> contents <> "~"
+ else if isEnabled Ext_raw_html opts
+ then "<sub>" <> contents <> "</sub>"
+ else case (render Nothing contents) of
+ ds | all (\d -> d >= '0' && d <= '9') ds
+ -> text (map toSubscript ds)
+ _ -> contents
+ where toSubscript c = chr (0x2080 + (ord c - 48))
+inlineToMarkdown opts (SmallCaps lst) = do
+ plain <- asks envPlain
+ if not plain &&
+ (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
+ then do
+ contents <- inlineListToMarkdown opts lst
+ return $ tagWithAttrs "span"
+ ("",[],[("style","font-variant:small-caps;")])
+ <> contents <> text "</span>"
+ else inlineListToMarkdown opts $ capitalize lst
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ if isEnabled Ext_smart opts
+ then "'" <> contents <> "'"
+ else "‘" <> contents <> "’"
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ if isEnabled Ext_smart opts
+ then "\"" <> contents <> "\""
+ else "“" <> contents <> "”"
+inlineToMarkdown opts (Code attr str) = do
+ let tickGroups = filter (\s -> '`' `elem` s) $ group str
+ let longest = if null tickGroups
+ then 0
+ else maximum $ map length tickGroups
+ let marker = replicate (longest + 1) '`'
+ let spacer = if (longest == 0) then "" else " "
+ let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
+ then attrsToMarkdown attr
+ else empty
+ plain <- asks envPlain
+ if plain
+ then return $ text str
+ else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
+inlineToMarkdown opts (Str str) = do
+ isPlain <- asks envPlain
+ let str' = (if isEnabled Ext_smart opts
+ then unsmartify opts
+ else id) $
+ if isPlain
+ then str
+ else escapeString opts str
+ return $ text str'
+inlineToMarkdown opts (Math InlineMath str) =
+ case writerHTMLMathMethod opts of
+ WebTeX url ->
+ inlineToMarkdown opts (Image nullAttr [Str str]
+ (url ++ urlEncode str, str))
+ _ | isEnabled Ext_tex_math_dollars opts ->
+ return $ "$" <> text str <> "$"
+ | isEnabled Ext_tex_math_single_backslash opts ->
+ return $ "\\(" <> text str <> "\\)"
+ | isEnabled Ext_tex_math_double_backslash opts ->
+ return $ "\\\\(" <> text str <> "\\\\)"
+ | otherwise -> do
+ plain <- asks envPlain
+ texMathToInlines InlineMath str >>=
+ inlineListToMarkdown opts .
+ (if plain then makeMathPlainer else id)
+inlineToMarkdown opts (Math DisplayMath str) =
+ case writerHTMLMathMethod opts of
+ WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
+ inlineToMarkdown opts (Image nullAttr [Str str]
+ (url ++ urlEncode str, str))
+ _ | isEnabled Ext_tex_math_dollars opts ->
+ return $ "$$" <> text str <> "$$"
+ | isEnabled Ext_tex_math_single_backslash opts ->
+ return $ "\\[" <> text str <> "\\]"
+ | isEnabled Ext_tex_math_double_backslash opts ->
+ return $ "\\\\[" <> text str <> "\\\\]"
+ | otherwise -> (\x -> cr <> x <> cr) `fmap`
+ (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
+inlineToMarkdown opts il@(RawInline f str) = do
+ plain <- asks envPlain
+ if not plain &&
+ ( f == "markdown" ||
+ (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) ||
+ (isEnabled Ext_raw_html opts && f == "html") )
+ then return $ text str
+ else do
+ report $ InlineNotRendered il
+ return empty
+inlineToMarkdown opts (LineBreak) = do
+ plain <- asks envPlain
+ if plain || isEnabled Ext_hard_line_breaks opts
+ then return cr
+ else return $
+ if isEnabled Ext_escaped_line_breaks opts
+ then "\\" <> cr
+ else " " <> cr
+inlineToMarkdown _ Space = do
+ escapeSpaces <- asks envEscapeSpaces
+ return $ if escapeSpaces then "\\ " else space
+inlineToMarkdown opts SoftBreak = do
+ escapeSpaces <- asks envEscapeSpaces
+ let space' = if escapeSpaces then "\\ " else space
+ return $ case writerWrapText opts of
+ WrapNone -> space'
+ WrapAuto -> space'
+ WrapPreserve -> cr
+inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (Cite (c:cs) lst)
+ | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
+ | otherwise =
+ if citationMode c == AuthorInText
+ then do
+ suffs <- inlineListToMarkdown opts $ citationSuffix c
+ rest <- mapM convertOne cs
+ let inbr = suffs <+> joincits rest
+ br = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
+ return $ text ("@" ++ citationId c) <+> br
+ else do
+ cits <- mapM convertOne (c:cs)
+ return $ text "[" <> joincits cits <> text "]"
+ where
+ joincits = hcat . intersperse (text "; ") . filter (not . isEmpty)
+ convertOne Citation { citationId = k
+ , citationPrefix = pinlines
+ , citationSuffix = sinlines
+ , citationMode = m }
+ = do
+ pdoc <- inlineListToMarkdown opts pinlines
+ sdoc <- inlineListToMarkdown opts sinlines
+ let k' = text (modekey m ++ "@" ++ k)
+ r = case sinlines of
+ Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
+ _ -> k' <+> sdoc
+ return $ pdoc <+> r
+ modekey SuppressAuthor = "-"
+ modekey _ = ""
+inlineToMarkdown opts lnk@(Link attr txt (src, tit))
+ | isEnabled Ext_raw_html opts &&
+ not (isEnabled Ext_link_attributes opts) &&
+ attr /= nullAttr = -- use raw HTML
+ (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
+ | otherwise = do
+ plain <- asks envPlain
+ linktext <- inlineListToMarkdown opts txt
+ let linktitle = if null tit
+ then empty
+ else text $ " \"" ++ tit ++ "\""
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
+ let useAuto = isURI src &&
+ case txt of
+ [Str s] | escapeURI s == srcSuffix -> True
+ _ -> False
+ let useRefLinks = writerReferenceLinks opts && not useAuto
+ shortcutable <- asks envRefShortcutable
+ let useShortcutRefLinks = shortcutable &&
+ isEnabled Ext_shortcut_reference_links opts
+ ref <- if useRefLinks then getReference attr txt (src, tit) else return []
+ reftext <- inlineListToMarkdown opts ref
+ return $ if useAuto
+ then if plain
+ then text srcSuffix
+ else "<" <> text srcSuffix <> ">"
+ else if useRefLinks
+ then let first = "[" <> linktext <> "]"
+ second = if txt == ref
+ then if useShortcutRefLinks
+ then ""
+ else "[]"
+ else "[" <> reftext <> "]"
+ in first <> second
+ else if plain
+ then linktext
+ else "[" <> linktext <> "](" <>
+ text src <> linktitle <> ")" <>
+ linkAttributes opts attr
+inlineToMarkdown opts img@(Image attr alternate (source, tit))
+ | isEnabled Ext_raw_html opts &&
+ not (isEnabled Ext_link_attributes opts) &&
+ attr /= nullAttr = -- use raw HTML
+ (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]])
+ | otherwise = do
+ plain <- asks envPlain
+ let txt = if null alternate || alternate == [Str source]
+ -- to prevent autolinks
+ then [Str ""]
+ else alternate
+ linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
+ return $ if plain
+ then "[" <> linkPart <> "]"
+ else "!" <> linkPart
+inlineToMarkdown opts (Note contents) = do
+ modify (\st -> st{ stNotes = contents : stNotes st })
+ st <- get
+ let ref = text $ writerIdentifierPrefix opts ++ show (stNoteNum st + (length $ stNotes st) - 1)
+ if isEnabled Ext_footnotes opts
+ then return $ "[^" <> ref <> "]"
+ else return $ "[" <> ref <> "]"
+
+makeMathPlainer :: [Inline] -> [Inline]
+makeMathPlainer = walk go
+ where
+ go (Emph xs) = Span nullAttr xs
+ go x = x
+
diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs
new file mode 100644
index 000000000..b7419ddf9
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Math.hs
@@ -0,0 +1,49 @@
+module Text.Pandoc.Writers.Math
+ ( texMathToInlines
+ , convertMath
+ )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.TeXMath (Exp, writePandoc, DisplayType(..), readTeX)
+
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
+-- can't be converted.
+texMathToInlines :: PandocMonad m
+ => MathType
+ -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> m [Inline]
+texMathToInlines mt inp = do
+ res <- convertMath writePandoc mt inp
+ case res of
+ Right (Just ils) -> return ils
+ Right (Nothing) -> do
+ report $ CouldNotConvertTeXMath inp ""
+ return [mkFallback mt inp]
+ Left il -> return [il]
+
+mkFallback :: MathType -> String -> Inline
+mkFallback mt str = Str (delim ++ str ++ delim)
+ where delim = case mt of
+ DisplayMath -> "$$"
+ InlineMath -> "$"
+
+-- | Converts a raw TeX math formula using a writer function,
+-- issuing a warning and producing a fallback (a raw string)
+-- on failure.
+convertMath :: PandocMonad m
+ => (DisplayType -> [Exp] -> a) -> MathType -> String
+ -> m (Either Inline a)
+convertMath writer mt str = do
+ case writer dt <$> readTeX str of
+ Right r -> return (Right r)
+ Left e -> do
+ report $ CouldNotConvertTeXMath str e
+ return (Left $ mkFallback mt str)
+ where dt = case mt of
+ DisplayMath -> DisplayBlock
+ InlineMath -> DisplayInline
+
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
new file mode 100644
index 000000000..dc6206e6c
--- /dev/null
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -0,0 +1,442 @@
+{-
+Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.MediaWiki
+ Copyright : Copyright (C) 2008-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to MediaWiki markup.
+
+MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
+-}
+module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Pretty (render)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.XML ( escapeStringForXML )
+import Data.List ( intersect, intercalate )
+import Network.URI ( isURI )
+import Control.Monad.Reader
+import Control.Monad.State
+import Text.Pandoc.Class (PandocMonad)
+
+data WriterState = WriterState {
+ stNotes :: Bool -- True if there are notes
+ , stOptions :: WriterOptions -- writer options
+ }
+
+data WriterReader = WriterReader {
+ options :: WriterOptions -- Writer options
+ , listLevel :: String -- String at beginning of list items, e.g. "**"
+ , useTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+type MediaWikiWriter = ReaderT WriterReader (State WriterState)
+
+-- | Convert Pandoc to MediaWiki.
+writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeMediaWiki opts document = return $
+ let initialState = WriterState { stNotes = False, stOptions = opts }
+ env = WriterReader { options = opts, listLevel = [], useTags = False }
+ in evalState (runReaderT (pandocToMediaWiki document) env) initialState
+
+-- | Return MediaWiki representation of document.
+pandocToMediaWiki :: Pandoc -> MediaWikiWriter String
+pandocToMediaWiki (Pandoc meta blocks) = do
+ opts <- asks options
+ metadata <- metaToJSON opts
+ (fmap trimr . blockListToMediaWiki)
+ inlineListToMediaWiki
+ meta
+ body <- blockListToMediaWiki blocks
+ notesExist <- gets stNotes
+ let notes = if notesExist
+ then "\n<references />"
+ else ""
+ let main = body ++ notes
+ let context = defField "body" main
+ $ defField "toc" (writerTableOfContents opts) metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Escape special characters for MediaWiki.
+escapeString :: String -> String
+escapeString = escapeStringForXML
+
+-- | Convert Pandoc block element to MediaWiki.
+blockToMediaWiki :: Block -- ^ Block element
+ -> MediaWikiWriter String
+
+blockToMediaWiki Null = return ""
+
+blockToMediaWiki (Div attrs bs) = do
+ contents <- blockListToMediaWiki bs
+ return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++
+ contents ++ "\n\n" ++ "</div>"
+
+blockToMediaWiki (Plain inlines) =
+ inlineListToMediaWiki inlines
+
+-- title beginning with fig: indicates that the image is a figure
+blockToMediaWiki (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return ""
+ else ("|caption " ++) `fmap` inlineListToMediaWiki txt
+ img <- imageToMediaWiki attr
+ let opt = if null txt
+ then ""
+ else "|alt=" ++ if null tit then capt else tit ++ capt
+ return $ "[[File:" ++ src ++ "|frame|none" ++ img ++ opt ++ "]]\n"
+
+blockToMediaWiki (Para inlines) = do
+ tags <- asks useTags
+ lev <- asks listLevel
+ contents <- inlineListToMediaWiki inlines
+ return $ if tags
+ then "<p>" ++ contents ++ "</p>"
+ else contents ++ if null lev then "\n" else ""
+
+blockToMediaWiki (LineBlock lns) =
+ blockToMediaWiki $ linesToPara lns
+
+blockToMediaWiki (RawBlock f str)
+ | f == Format "mediawiki" = return str
+ | f == Format "html" = return str
+ | otherwise = return ""
+
+blockToMediaWiki HorizontalRule = return "\n-----\n"
+
+blockToMediaWiki (Header level _ inlines) = do
+ contents <- inlineListToMediaWiki inlines
+ let eqs = replicate level '='
+ return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
+
+blockToMediaWiki (CodeBlock (_,classes,_) str) = do
+ let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
+ "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
+ "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
+ "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
+ "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
+ "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
+ "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
+ "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
+ "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
+ return $
+ if null at
+ then "<pre" ++ (if null classes
+ then ">"
+ else " class=\"" ++ unwords classes ++ "\">") ++
+ escapeString str ++ "</pre>"
+ else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>"
+ -- note: no escape!
+
+blockToMediaWiki (BlockQuote blocks) = do
+ contents <- blockListToMediaWiki blocks
+ return $ "<blockquote>" ++ contents ++ "</blockquote>"
+
+blockToMediaWiki (Table capt aligns widths headers rows') = do
+ caption <- if null capt
+ then return ""
+ else do
+ c <- inlineListToMediaWiki capt
+ return $ "|+ " ++ trimr c ++ "\n"
+ let headless = all null headers
+ let allrows = if headless then rows' else headers:rows'
+ tableBody <- intercalate "|-\n" `fmap`
+ mapM (tableRowToMediaWiki headless aligns widths)
+ (zip [1..] allrows)
+ return $ "{|\n" ++ caption ++ tableBody ++ "|}\n"
+
+blockToMediaWiki x@(BulletList items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
+ then do
+ contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items
+ return $ "<ul>\n" ++ vcat contents ++ "</ul>\n"
+ else do
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
+
+blockToMediaWiki x@(OrderedList attribs items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
+ then do
+ contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items
+ return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n"
+ else do
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
+
+blockToMediaWiki x@(DefinitionList items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
+ then do
+ contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items
+ return $ "<dl>\n" ++ vcat contents ++ "</dl>\n"
+ else do
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet or ordered list item (list of blocks) to MediaWiki.
+listItemToMediaWiki :: [Block] -> MediaWikiWriter String
+listItemToMediaWiki items = do
+ contents <- blockListToMediaWiki items
+ tags <- asks useTags
+ if tags
+ then return $ "<li>" ++ contents ++ "</li>"
+ else do
+ marker <- asks listLevel
+ return $ marker ++ " " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to MediaWiki.
+definitionListItemToMediaWiki :: ([Inline],[[Block]])
+ -> MediaWikiWriter String
+definitionListItemToMediaWiki (label, items) = do
+ labelText <- inlineListToMediaWiki label
+ contents <- mapM blockListToMediaWiki items
+ tags <- asks useTags
+ if tags
+ then return $ "<dt>" ++ labelText ++ "</dt>\n" ++
+ intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents)
+ else do
+ marker <- asks listLevel
+ return $ marker ++ " " ++ labelText ++ "\n" ++
+ intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents)
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (num, sty, _) items -> all isSimpleListItem items &&
+ num == 1 && sty `elem` [DefaultStyle, Decimal]
+ DefinitionList items -> all isSimpleListItem $ concatMap snd items
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+-- Auxiliary functions for tables:
+
+tableRowToMediaWiki :: Bool
+ -> [Alignment]
+ -> [Double]
+ -> (Int, [[Block]])
+ -> MediaWikiWriter String
+tableRowToMediaWiki headless alignments widths (rownum, cells) = do
+ cells' <- mapM (tableCellToMediaWiki headless rownum)
+ $ zip3 alignments widths cells
+ return $ unlines cells'
+
+tableCellToMediaWiki :: Bool
+ -> Int
+ -> (Alignment, Double, [Block])
+ -> MediaWikiWriter String
+tableCellToMediaWiki headless rownum (alignment, width, bs) = do
+ contents <- blockListToMediaWiki bs
+ let marker = if rownum == 1 && not headless then "!" else "|"
+ let percent w = show (truncate (100*w) :: Integer) ++ "%"
+ let attrs = ["align=" ++ show (alignmentToString alignment) |
+ alignment /= AlignDefault && alignment /= AlignLeft] ++
+ ["width=\"" ++ percent width ++ "\"" |
+ width /= 0.0 && rownum == 1]
+ let attr = if null attrs
+ then ""
+ else unwords attrs ++ "|"
+ let sep = case bs of
+ [Plain _] -> " "
+ [Para _] -> " "
+ _ -> "\n"
+ return $ marker ++ attr ++ sep ++ trimr contents
+
+alignmentToString :: Alignment -> String
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+imageToMediaWiki :: Attr -> MediaWikiWriter String
+imageToMediaWiki attr = do
+ opts <- gets stOptions
+ let (_, cls, _) = attr
+ toPx = fmap (showInPixel opts) . checkPct
+ checkPct (Just (Percent _)) = Nothing
+ checkPct maybeDim = maybeDim
+ go (Just w) Nothing = '|':w ++ "px"
+ go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px"
+ go Nothing (Just h) = "|x" ++ h ++ "px"
+ go Nothing Nothing = ""
+ dims = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
+ classes = if null cls
+ then ""
+ else "|class=" ++ unwords cls
+ return $ dims ++ classes
+
+-- | Convert list of Pandoc block elements to MediaWiki.
+blockListToMediaWiki :: [Block] -- ^ List of block elements
+ -> MediaWikiWriter String
+blockListToMediaWiki blocks =
+ fmap vcat $ mapM blockToMediaWiki blocks
+
+-- | Convert list of Pandoc inline elements to MediaWiki.
+inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String
+inlineListToMediaWiki lst =
+ fmap concat $ mapM inlineToMediaWiki lst
+
+-- | Convert Pandoc inline element to MediaWiki.
+inlineToMediaWiki :: Inline -> MediaWikiWriter String
+
+inlineToMediaWiki (Span attrs ils) = do
+ contents <- inlineListToMediaWiki ils
+ return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>"
+
+inlineToMediaWiki (Emph lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "''" ++ contents ++ "''"
+
+inlineToMediaWiki (Strong lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "'''" ++ contents ++ "'''"
+
+inlineToMediaWiki (Strikeout lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "<s>" ++ contents ++ "</s>"
+
+inlineToMediaWiki (Superscript lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "<sup>" ++ contents ++ "</sup>"
+
+inlineToMediaWiki (Subscript lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "<sub>" ++ contents ++ "</sub>"
+
+inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst
+
+inlineToMediaWiki (Quoted SingleQuote lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "\8216" ++ contents ++ "\8217"
+
+inlineToMediaWiki (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMediaWiki lst
+ return $ "\8220" ++ contents ++ "\8221"
+
+inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst
+
+inlineToMediaWiki (Code _ str) =
+ return $ "<code>" ++ escapeString str ++ "</code>"
+
+inlineToMediaWiki (Str str) = return $ escapeString str
+
+inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>"
+ -- note: str should NOT be escaped
+
+inlineToMediaWiki (RawInline f str)
+ | f == Format "mediawiki" = return str
+ | f == Format "html" = return str
+ | otherwise = return ""
+
+inlineToMediaWiki LineBreak = return "<br />\n"
+
+inlineToMediaWiki SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ case wrapText of
+ WrapAuto -> return " "
+ WrapNone -> return " "
+ WrapPreserve -> return "\n"
+
+inlineToMediaWiki Space = return " "
+
+inlineToMediaWiki (Link _ txt (src, _)) = do
+ label <- inlineListToMediaWiki txt
+ case txt of
+ [Str s] | isURI src && escapeURI s == src -> return src
+ _ -> return $ if isURI src
+ then "[" ++ src ++ " " ++ label ++ "]"
+ else "[[" ++ src' ++ "|" ++ label ++ "]]"
+ where src' = case src of
+ '/':xs -> xs -- with leading / it's a
+ _ -> src -- link to a help page
+
+inlineToMediaWiki (Image attr alt (source, tit)) = do
+ img <- imageToMediaWiki attr
+ alt' <- inlineListToMediaWiki alt
+ let txt = if null tit
+ then if null alt
+ then ""
+ else '|' : alt'
+ else '|' : tit
+ return $ "[[File:" ++ source ++ img ++ txt ++ "]]"
+
+inlineToMediaWiki (Note contents) = do
+ contents' <- blockListToMediaWiki contents
+ modify (\s -> s { stNotes = True })
+ return $ "<ref>" ++ contents' ++ "</ref>"
+ -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
new file mode 100644
index 000000000..2421fd94d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -0,0 +1,79 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Native
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of a 'Pandoc' document to a string representation.
+-}
+module Text.Pandoc.Writers.Native ( writeNative )
+where
+import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
+import Data.List ( intersperse )
+import Text.Pandoc.Definition
+import Text.Pandoc.Pretty
+import Text.Pandoc.Class (PandocMonad)
+
+prettyList :: [Doc] -> Doc
+prettyList ds =
+ "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> Doc
+prettyBlock (LineBlock lines') =
+ "LineBlock" $$ prettyList (map (text . show) lines')
+prettyBlock (BlockQuote blocks) =
+ "BlockQuote" $$ prettyList (map prettyBlock blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList" <> space <> text (show attribs) $$
+ (prettyList $ map (prettyList . map prettyBlock) blockLists)
+prettyBlock (BulletList blockLists) =
+ "BulletList" $$
+ (prettyList $ map (prettyList . map prettyBlock) blockLists)
+prettyBlock (DefinitionList items) = "DefinitionList" $$
+ (prettyList $ map deflistitem items)
+ where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <>
+ nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <>
+ text (show widths) $$
+ prettyRow header $$
+ prettyList (map prettyRow rows)
+ where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols)
+prettyBlock (Div attr blocks) =
+ text ("Div " <> show attr) $$ prettyList (map prettyBlock blocks)
+prettyBlock block = text $ show block
+
+-- | Prettyprint Pandoc document.
+writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeNative opts (Pandoc meta blocks) = return $
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ withHead = case writerTemplate opts of
+ Just _ -> \bs -> text ("Pandoc (" ++ show meta ++ ")") $$
+ bs $$ cr
+ Nothing -> id
+ in render colwidth $ withHead $ prettyList $ map prettyBlock blocks
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
new file mode 100644
index 000000000..ee5fa4c24
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-
+Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ODT
+ Copyright : Copyright (C) 2008-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to ODT.
+-}
+module Text.Pandoc.Writers.ODT ( writeODT ) where
+import Data.List ( isPrefixOf )
+import Data.Maybe ( fromMaybe )
+import Text.XML.Light.Output
+import Text.TeXMath
+import qualified Data.ByteString.Lazy as B
+import Text.Pandoc.UTF8 ( fromStringLazy )
+import Codec.Archive.Zip
+import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) )
+import Text.Pandoc.Shared ( stringify )
+import Text.Pandoc.ImageSize
+import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
+import Text.Pandoc.Definition
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared ( fixDisplayMath )
+import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
+import Control.Monad.State
+import Control.Monad.Except (runExceptT)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.XML
+import Text.Pandoc.Pretty
+import System.FilePath ( takeExtension, takeDirectory, (<.>))
+import Text.Pandoc.Class ( PandocMonad, report )
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Logging
+
+data ODTState = ODTState { stEntries :: [Entry]
+ }
+
+type O m = StateT ODTState m
+
+-- | Produce an ODT file from a Pandoc document.
+writeODT :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> m B.ByteString
+writeODT opts doc =
+ let initState = ODTState{ stEntries = []
+ }
+ in
+ evalStateT (pandocToODT opts doc) initState
+
+-- | Produce an ODT file from a Pandoc document.
+pandocToODT :: PandocMonad m
+ => WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> O m B.ByteString
+pandocToODT opts doc@(Pandoc meta _) = do
+ let datadir = writerUserDataDir opts
+ let title = docTitle meta
+ refArchive <-
+ case writerReferenceDoc opts of
+ Just f -> liftM toArchive $ lift $ P.readFileLazy f
+ Nothing -> lift $ (toArchive . B.fromStrict) <$>
+ P.readDataFile datadir "reference.odt"
+ -- handle formulas and pictures
+ -- picEntriesRef <- P.newIORef ([] :: [Entry])
+ doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc
+ newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc'
+ epochtime <- floor `fmap` (lift P.getPOSIXTime)
+ let contentEntry = toEntry "content.xml" epochtime
+ $ fromStringLazy newContents
+ picEntries <- gets stEntries
+ let archive = foldr addEntryToArchive refArchive
+ $ contentEntry : picEntries
+ -- construct META-INF/manifest.xml based on archive
+ let toFileEntry fp = case getMimeType fp of
+ Nothing -> empty
+ Just m -> selfClosingTag "manifest:file-entry"
+ [("manifest:media-type", m)
+ ,("manifest:full-path", fp)
+ ,("manifest:version", "1.2")
+ ]
+ let files = [ ent | ent <- filesInArchive archive,
+ not ("META-INF" `isPrefixOf` ent) ]
+ let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive,
+ "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ]
+ let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
+ $ fromStringLazy $ render Nothing
+ $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+ $$
+ ( inTags True "manifest:manifest"
+ [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")
+ ,("manifest:version","1.2")]
+ $ ( selfClosingTag "manifest:file-entry"
+ [("manifest:media-type","application/vnd.oasis.opendocument.text")
+ ,("manifest:full-path","/")]
+ $$ vcat ( map toFileEntry $ files )
+ $$ vcat ( map toFileEntry $ formulas )
+ )
+ )
+ let archive' = addEntryToArchive manifestEntry archive
+ let metaEntry = toEntry "meta.xml" epochtime
+ $ fromStringLazy $ render Nothing
+ $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+ $$
+ ( inTags True "office:document-meta"
+ [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0")
+ ,("xmlns:xlink","http://www.w3.org/1999/xlink")
+ ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0")
+ ,("xmlns:ooo","http://openoffice.org/2004/office")
+ ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
+ ,("office:version","1.2")]
+ $ ( inTagsSimple "office:meta"
+ $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title))
+ )
+ )
+ )
+ -- make sure mimetype is first
+ let mimetypeEntry = toEntry "mimetype" epochtime
+ $ fromStringLazy "application/vnd.oasis.opendocument.text"
+ let archive'' = addEntryToArchive mimetypeEntry
+ $ addEntryToArchive metaEntry archive'
+ return $ fromArchive archive''
+
+-- | transform both Image and Math elements
+transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline
+transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = do
+ res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
+ case res of
+ Left (_ :: PandocError) -> do
+ report $ CouldNotFetchResource src ""
+ return $ Emph lab
+ Right (img, mbMimeType) -> do
+ (ptX, ptY) <- case imageSize img of
+ Right s -> return $ sizeInPoints s
+ Left msg -> do
+ report $ CouldNotDetermineImageSize src msg
+ return (100, 100)
+ let dims =
+ case (getDim Width, getDim Height) of
+ (Just w, Just h) -> [("width", show w), ("height", show h)]
+ (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")]
+ (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)]
+ (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")]
+ (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)]
+ _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")]
+ where
+ ratio = ptX / ptY
+ getDim dir = case (dimension dir attr) of
+ Just (Percent i) -> Just $ Percent i
+ Just dim -> Just $ Inch $ inInch opts dim
+ Nothing -> Nothing
+ let newattr = (id', cls, dims)
+ entries <- gets stEntries
+ let extension = fromMaybe (takeExtension $ takeWhile (/='?') src)
+ (mbMimeType >>= extensionFromMimeType)
+ let newsrc = "Pictures/" ++ show (length entries) <.> extension
+ let toLazy = B.fromChunks . (:[])
+ epochtime <- floor `fmap` (lift P.getPOSIXTime)
+ let entry = toEntry newsrc epochtime $ toLazy img
+ modify $ \st -> st{ stEntries = entry : entries }
+ return $ Image newattr lab (newsrc, t)
+transformPicMath _ (Math t math) = do
+ entries <- gets stEntries
+ let dt = if t == InlineMath then DisplayInline else DisplayBlock
+ case writeMathML dt <$> readTeX math of
+ Left _ -> return $ Math t math
+ Right r -> do
+ let conf = useShortEmptyTags (const False) defaultConfigPP
+ let mathml = ppcTopElement conf r
+ epochtime <- floor `fmap` (lift $ P.getPOSIXTime)
+ let dirname = "Formula-" ++ show (length entries) ++ "/"
+ let fname = dirname ++ "content.xml"
+ let entry = toEntry fname epochtime (fromStringLazy mathml)
+ modify $ \st -> st{ stEntries = entry : entries }
+ return $ RawInline (Format "opendocument") $ render Nothing $
+ inTags False "draw:frame" [("text:anchor-type",
+ if t == DisplayMath
+ then "paragraph"
+ else "as-char")
+ ,("style:vertical-pos", "middle")
+ ,("style:vertical-rel", "text")] $
+ selfClosingTag "draw:object" [("xlink:href", dirname)
+ , ("xlink:type", "simple")
+ , ("xlink:show", "embed")
+ , ("xlink:actuate", "onLoad")]
+
+transformPicMath _ x = return x
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
new file mode 100644
index 000000000..bc0cfc300
--- /dev/null
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -0,0 +1,103 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.OPML
+ Copyright : Copyright (C) 2013-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to OPML XML.
+-}
+module Text.Pandoc.Writers.OPML ( writeOPML) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Writers.Markdown (writeMarkdown)
+import Text.Pandoc.Pretty
+import Text.Pandoc.Compat.Time
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Error
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Class (PandocMonad)
+
+-- | Convert Pandoc document to string in OPML format.
+writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeOPML opts (Pandoc meta blocks) = do
+ let elements = hierarchicalize blocks
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta
+ metadata <- metaToJSON opts
+ (writeMarkdown def . Pandoc nullMeta)
+ (\ils -> trimr <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils]))
+ meta'
+ main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements)
+ let context = defField "body" main metadata
+ return $ case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+
+writeHtmlInlines :: PandocMonad m => [Inline] -> m String
+writeHtmlInlines ils =
+ trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils])
+
+-- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT
+showDateTimeRFC822 :: UTCTime -> String
+showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z"
+
+convertDate :: [Inline] -> String
+convertDate ils = maybe "" showDateTimeRFC822 $
+#if MIN_VERSION_time(1,5,0)
+ parseTimeM True
+#else
+ parseTime
+#endif
+ defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils)
+
+-- | Convert an Element to OPML.
+elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc
+elementToOPML _ (Blk _) = return empty
+elementToOPML opts (Sec _ _num _ title elements) = do
+ let isBlk :: Element -> Bool
+ isBlk (Blk _) = True
+ isBlk _ = False
+
+ fromBlk :: PandocMonad m => Element -> m Block
+ fromBlk (Blk x) = return x
+ fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block"
+
+ (blocks, rest) = span isBlk elements
+ htmlIls <- writeHtmlInlines title
+ md <- if null blocks
+ then return []
+ else do blks <- mapM fromBlk blocks
+ writeMarkdown def $ Pandoc nullMeta blks
+ let attrs = [("text", htmlIls)] ++ [("_note", md) | not (null blocks)]
+ o <- mapM (elementToOPML opts) rest
+ return $ inTags True "outline" attrs $ vcat o
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
new file mode 100644
index 000000000..851e18b8e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -0,0 +1,626 @@
+{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-}
+{-
+Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it>
+ and John MacFarlane.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.OpenDocument
+ Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to OpenDocument XML.
+-}
+module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.XML
+import Text.Pandoc.Shared (linesToPara)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Pretty
+import Text.Printf ( printf )
+import Control.Arrow ( (***), (>>>) )
+import Control.Monad.State hiding ( when )
+import Data.Char (chr)
+import qualified Data.Set as Set
+import qualified Data.Map as Map
+import Text.Pandoc.Writers.Shared
+import Data.List (sortBy)
+import Data.Ord (comparing)
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+--
+-- OpenDocument writer
+--
+
+type OD m = StateT WriterState m
+
+data WriterState =
+ WriterState { stNotes :: [Doc]
+ , stTableStyles :: [Doc]
+ , stParaStyles :: [Doc]
+ , stListStyles :: [(Int, [Doc])]
+ , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc)
+ , stTextStyleAttr :: Set.Set TextStyle
+ , stIndentPara :: Int
+ , stInDefinition :: Bool
+ , stTight :: Bool
+ , stFirstPara :: Bool
+ , stImageId :: Int
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState =
+ WriterState { stNotes = []
+ , stTableStyles = []
+ , stParaStyles = []
+ , stListStyles = []
+ , stTextStyles = Map.empty
+ , stTextStyleAttr = Set.empty
+ , stIndentPara = 0
+ , stInDefinition = False
+ , stTight = False
+ , stFirstPara = False
+ , stImageId = 1
+ }
+
+when :: Bool -> Doc -> Doc
+when p a = if p then a else empty
+
+addTableStyle :: PandocMonad m => Doc -> OD m ()
+addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s }
+
+addNote :: PandocMonad m => Doc -> OD m ()
+addNote i = modify $ \s -> s { stNotes = i : stNotes s }
+
+addParaStyle :: PandocMonad m => Doc -> OD m ()
+addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s }
+
+addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m ()
+addTextStyle attrs i = modify $ \s ->
+ s { stTextStyles = Map.insert attrs i (stTextStyles s) }
+
+addTextStyleAttr :: PandocMonad m => TextStyle -> OD m ()
+addTextStyleAttr t = modify $ \s ->
+ s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) }
+
+increaseIndent :: PandocMonad m => OD m ()
+increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s }
+
+resetIndent :: PandocMonad m => OD m ()
+resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 }
+
+inTightList :: PandocMonad m => OD m a -> OD m a
+inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r ->
+ modify (\s -> s { stTight = False }) >> return r
+
+setInDefinitionList :: PandocMonad m => Bool -> OD m ()
+setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
+
+setFirstPara :: PandocMonad m => OD m ()
+setFirstPara = modify $ \s -> s { stFirstPara = True }
+
+inParagraphTags :: PandocMonad m => Doc -> OD m Doc
+inParagraphTags d | isEmpty d = return empty
+inParagraphTags d = do
+ b <- gets stFirstPara
+ a <- if b
+ then do modify $ \st -> st { stFirstPara = False }
+ return $ [("text:style-name", "First_20_paragraph")]
+ else return [("text:style-name", "Text_20_body")]
+ return $ inTags False "text:p" a d
+
+inParagraphTagsWithStyle :: String -> Doc -> Doc
+inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
+
+inSpanTags :: String -> Doc -> Doc
+inSpanTags s = inTags False "text:span" [("text:style-name",s)]
+
+withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a
+withTextStyle s f = do
+ oldTextStyleAttr <- gets stTextStyleAttr
+ addTextStyleAttr s
+ res <- f
+ modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr }
+ return res
+
+inTextStyle :: PandocMonad m => Doc -> OD m Doc
+inTextStyle d = do
+ at <- gets stTextStyleAttr
+ if Set.null at
+ then return d
+ else do
+ styles <- gets stTextStyles
+ case Map.lookup at styles of
+ Just (styleName, _) -> return $
+ inTags False "text:span" [("text:style-name",styleName)] d
+ Nothing -> do
+ let styleName = "T" ++ show (Map.size styles + 1)
+ addTextStyle at (styleName,
+ inTags False "style:style"
+ [("style:name", styleName)
+ ,("style:family", "text")]
+ $ selfClosingTag "style:text-properties"
+ (concatMap textStyleAttr (Set.toList at)))
+ return $ inTags False
+ "text:span" [("text:style-name",styleName)] d
+
+inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc
+inHeaderTags i d =
+ return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
+ , ("text:outline-level", show i)] d
+
+inQuotes :: QuoteType -> Doc -> Doc
+inQuotes SingleQuote s = char '\8216' <> s <> char '\8217'
+inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221'
+
+handleSpaces :: String -> Doc
+handleSpaces s
+ | ( ' ':_) <- s = genTag s
+ | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x
+ | otherwise = rm s
+ where
+ genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>)
+ tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)]
+ rm ( ' ':xs) = char ' ' <> genTag xs
+ rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs
+ rm ( x:xs) = char x <> rm xs
+ rm [] = empty
+
+-- | Convert Pandoc document to string in OpenDocument format.
+writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeOpenDocument opts (Pandoc meta blocks) = do
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let render' = render colwidth
+ ((body, metadata),s) <- flip runStateT
+ defaultWriterState $ do
+ m <- metaToJSON opts
+ (fmap (render colwidth) . blocksToOpenDocument opts)
+ (fmap (render colwidth) . inlinesToOpenDocument opts)
+ meta
+ b <- render' `fmap` blocksToOpenDocument opts blocks
+ return (b, m)
+ let styles = stTableStyles s ++ stParaStyles s ++
+ map snd (reverse $ sortBy (comparing fst) $
+ Map.elems (stTextStyles s))
+ listStyle (n,l) = inTags True "text:list-style"
+ [("style:name", "L" ++ show n)] (vcat l)
+ let listStyles = map listStyle (stListStyles s)
+ let automaticStyles = vcat $ reverse $ styles ++ listStyles
+ let context = defField "body" body
+ $ defField "automatic-styles" (render' automaticStyles)
+ $ metadata
+ return $ case writerTemplate opts of
+ Nothing -> body
+ Just tpl -> renderTemplate' tpl context
+
+withParagraphStyle :: PandocMonad m
+ => WriterOptions -> String -> [Block] -> OD m Doc
+withParagraphStyle o s (b:bs)
+ | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l
+ | otherwise = go =<< blockToOpenDocument o b
+ where go i = (<>) i <$> withParagraphStyle o s bs
+withParagraphStyle _ _ [] = return empty
+
+inPreformattedTags :: PandocMonad m => String -> OD m Doc
+inPreformattedTags s = do
+ n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")]
+ return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s
+
+orderedListToOpenDocument :: PandocMonad m
+ => WriterOptions -> Int -> [[Block]] -> OD m Doc
+orderedListToOpenDocument o pn bs =
+ vcat . map (inTagsIndented "text:list-item") <$>
+ mapM (orderedItemToOpenDocument o pn . map plainToPara) bs
+
+orderedItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> Int -> [Block] -> OD m Doc
+orderedItemToOpenDocument o n (b:bs)
+ | OrderedList a l <- b = newLevel a l
+ | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l
+ | otherwise = go =<< blockToOpenDocument o b
+ where
+ go i = ($$) i <$> orderedItemToOpenDocument o n bs
+ newLevel a l = do
+ nn <- length <$> gets stParaStyles
+ ls <- head <$> gets stListStyles
+ modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) }
+ inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l
+orderedItemToOpenDocument _ _ [] = return empty
+
+isTightList :: [[Block]] -> Bool
+isTightList [] = False
+isTightList (b:_)
+ | Plain {} : _ <- b = True
+ | otherwise = False
+
+newOrderedListStyle :: PandocMonad m
+ => Bool -> ListAttributes -> OD m (Int,Int)
+newOrderedListStyle b a = do
+ ln <- (+) 1 . length <$> gets stListStyles
+ let nbs = orderedListLevelStyle a (ln, [])
+ pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln
+ modify $ \s -> s { stListStyles = nbs : stListStyles s }
+ return (ln,pn)
+
+bulletListToOpenDocument :: PandocMonad m
+ => WriterOptions -> [[Block]] -> OD m Doc
+bulletListToOpenDocument o b = do
+ ln <- (+) 1 . length <$> gets stListStyles
+ (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln
+ modify $ \s -> s { stListStyles = ns : stListStyles s }
+ is <- listItemsToOpenDocument ("P" ++ show pn) o b
+ return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is
+
+listItemsToOpenDocument :: PandocMonad m
+ => String -> WriterOptions -> [[Block]] -> OD m Doc
+listItemsToOpenDocument s o is =
+ vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is
+
+deflistItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc
+deflistItemToOpenDocument o (t,d) = do
+ let ts = if isTightList d
+ then "Definition_20_Term_20_Tight" else "Definition_20_Term"
+ ds = if isTightList d
+ then "Definition_20_Definition_20_Tight" else "Definition_20_Definition"
+ t' <- withParagraphStyle o ts [Para t]
+ d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d
+ return $ t' $$ d'
+
+inBlockQuote :: PandocMonad m
+ => WriterOptions -> Int -> [Block] -> OD m Doc
+inBlockQuote o i (b:bs)
+ | BlockQuote l <- b = do increaseIndent
+ ni <- paraStyle
+ [("style:parent-style-name","Quotations")]
+ go =<< inBlockQuote o ni (map plainToPara l)
+ | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l
+ | otherwise = do go =<< blockToOpenDocument o b
+ where go block = ($$) block <$> inBlockQuote o i bs
+inBlockQuote _ _ [] = resetIndent >> return empty
+
+-- | Convert a list of Pandoc blocks to OpenDocument.
+blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc
+blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
+
+-- | Convert a Pandoc block element to OpenDocument.
+blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc
+blockToOpenDocument o bs
+ | Plain b <- bs = if null b
+ then return empty
+ else inParagraphTags =<< inlinesToOpenDocument o b
+ | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs
+ = figure attr c s t
+ | Para b <- bs = if null b
+ then return empty
+ else inParagraphTags =<< inlinesToOpenDocument o b
+ | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b
+ | Div _ xs <- bs = blocksToOpenDocument o xs
+ | Header i _ b <- bs = setFirstPara >>
+ (inHeaderTags i =<< inlinesToOpenDocument o b)
+ | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
+ | DefinitionList b <- bs = setFirstPara >> defList b
+ | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b
+ | OrderedList a b <- bs = setFirstPara >> orderedList a b
+ | CodeBlock _ s <- bs = setFirstPara >> preformatted s
+ | Table c a w h r <- bs = setFirstPara >> table c a w h r
+ | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p"
+ [ ("text:style-name", "Horizontal_20_Line") ])
+ | RawBlock f s <- bs = if f == Format "opendocument"
+ then return $ text s
+ else do
+ report $ BlockNotRendered bs
+ return empty
+ | Null <- bs = return empty
+ | otherwise = return empty
+ where
+ defList b = do setInDefinitionList True
+ r <- vcat <$> mapM (deflistItemToOpenDocument o) b
+ setInDefinitionList False
+ return r
+ preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s)
+ mkBlockQuote b = do increaseIndent
+ i <- paraStyle
+ [("style:parent-style-name","Quotations")]
+ inBlockQuote o i (map plainToPara b)
+ orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a
+ inTags True "text:list" [ ("text:style-name", "L" ++ show ln)]
+ <$> orderedListToOpenDocument o pn b
+ table c a w h r = do
+ tn <- length <$> gets stTableStyles
+ pn <- length <$> gets stParaStyles
+ let genIds = map chr [65..]
+ name = "Table" ++ show (tn + 1)
+ columnIds = zip genIds w
+ mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])]
+ columns = map mkColumn columnIds
+ paraHStyles = paraTableStyles "Heading" pn a
+ paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a
+ newPara = map snd . filter (not . isEmpty . snd)
+ addTableStyle $ tableStyle tn columnIds
+ mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
+ captionDoc <- if null c
+ then return empty
+ else withParagraphStyle o "Table" [Para c]
+ th <- if all null h
+ then return empty
+ else colHeadsToOpenDocument o name (map fst paraHStyles) h
+ tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r
+ return $ inTags True "table:table" [ ("table:name" , name)
+ , ("table:style-name", name)
+ ] (vcat columns $$ th $$ vcat tr) $$ captionDoc
+ figure attr caption source title | null caption =
+ withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]]
+ | otherwise = do
+ imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]]
+ captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
+ return $ imageDoc $$ captionDoc
+
+colHeadsToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> [String] -> [[Block]]
+ -> OD m Doc
+colHeadsToOpenDocument o tn ns hs =
+ inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$>
+ mapM (tableItemToOpenDocument o tn) (zip ns hs)
+
+tableRowToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> [String] -> [[Block]]
+ -> OD m Doc
+tableRowToOpenDocument o tn ns cs =
+ inTagsIndented "table:table-row" . vcat <$>
+ mapM (tableItemToOpenDocument o tn) (zip ns cs)
+
+tableItemToOpenDocument :: PandocMonad m
+ => WriterOptions -> String -> (String,[Block])
+ -> OD m Doc
+tableItemToOpenDocument o tn (n,i) =
+ let a = [ ("table:style-name" , tn ++ ".A1" )
+ , ("office:value-type", "string" )
+ ]
+ in inTags True "table:table-cell" a <$>
+ withParagraphStyle o n (map plainToPara i)
+
+-- | Convert a list of inline elements to OpenDocument.
+inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc
+inlinesToOpenDocument o l = hcat <$> toChunks o l
+
+toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc]
+toChunks _ [] = return []
+toChunks o (x : xs)
+ | isChunkable x = do
+ contents <- (inTextStyle . hcat) =<<
+ mapM (inlineToOpenDocument o) (x:ys)
+ rest <- toChunks o zs
+ return (contents : rest)
+ | otherwise = do
+ contents <- inlineToOpenDocument o x
+ rest <- toChunks o xs
+ return (contents : rest)
+ where (ys, zs) = span isChunkable xs
+
+isChunkable :: Inline -> Bool
+isChunkable (Str _) = True
+isChunkable Space = True
+isChunkable SoftBreak = True
+isChunkable _ = False
+
+-- | Convert an inline element to OpenDocument.
+inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc
+inlineToOpenDocument o ils
+ = case ils of
+ Space -> return space
+ SoftBreak
+ | writerWrapText o == WrapPreserve
+ -> return $ preformatted "\n"
+ | otherwise -> return $ space
+ Span _ xs -> inlinesToOpenDocument o xs
+ LineBreak -> return $ selfClosingTag "text:line-break" []
+ Str s -> return $ handleSpaces $ escapeStringForXML s
+ Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l
+ Strong l -> withTextStyle Bold $ inlinesToOpenDocument o l
+ Strikeout l -> withTextStyle Strike $ inlinesToOpenDocument o l
+ Superscript l -> withTextStyle Sup $ inlinesToOpenDocument o l
+ Subscript l -> withTextStyle Sub $ inlinesToOpenDocument o l
+ SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l
+ Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l
+ Code _ s -> inlinedCode $ preformatted s
+ Math t s -> lift (texMathToInlines t s) >>=
+ inlinesToOpenDocument o
+ Cite _ l -> inlinesToOpenDocument o l
+ RawInline f s -> if f == Format "opendocument"
+ then return $ text s
+ else do
+ report $ InlineNotRendered ils
+ return empty
+ Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l
+ Image attr _ (s,t) -> mkImg attr s t
+ Note l -> mkNote l
+ where
+ preformatted s = handleSpaces $ escapeStringForXML s
+ inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s
+ mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple")
+ , ("xlink:href" , s )
+ , ("office:name", t )
+ ] . inSpanTags "Definition"
+ mkImg (_, _, kvs) s _ = do
+ id' <- gets stImageId
+ modify (\st -> st{ stImageId = id' + 1 })
+ let getDims [] = []
+ getDims (("width", w) :xs) = ("svg:width", w) : getDims xs
+ getDims (("height", h):xs) = ("svg:height", h) : getDims xs
+ getDims (x@("style:rel-width", _) :xs) = x : getDims xs
+ getDims (x@("style:rel-height", _):xs) = x : getDims xs
+ getDims (_:xs) = getDims xs
+ return $ inTags False "draw:frame"
+ (("draw:name", "img" ++ show id') : getDims kvs) $
+ selfClosingTag "draw:image" [ ("xlink:href" , s )
+ , ("xlink:type" , "simple")
+ , ("xlink:show" , "embed" )
+ , ("xlink:actuate", "onLoad")]
+ mkNote l = do
+ n <- length <$> gets stNotes
+ let footNote t = inTags False "text:note"
+ [ ("text:id" , "ftn" ++ show n)
+ , ("text:note-class", "footnote" )] $
+ inTagsSimple "text:note-citation" (text . show $ n + 1) <>
+ inTagsSimple "text:note-body" t
+ nn <- footNote <$> withParagraphStyle o "Footnote" l
+ addNote nn
+ return nn
+
+bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc]))
+bulletListStyle l = do
+ let doStyles i = inTags True "text:list-level-style-bullet"
+ [ ("text:level" , show (i + 1) )
+ , ("text:style-name" , "Bullet_20_Symbols")
+ , ("style:num-suffix", "." )
+ , ("text:bullet-char", [bulletList !! i] )
+ ] (listLevelStyle (1 + i))
+ bulletList = map chr $ cycle [8226,8227,8259]
+ listElStyle = map doStyles [0..9]
+ pn <- paraListStyle l
+ return (pn, (l, listElStyle))
+
+orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc])
+orderedListLevelStyle (s,n, d) (l,ls) =
+ let suffix = case d of
+ OneParen -> [("style:num-suffix", ")")]
+ TwoParens -> [("style:num-prefix", "(")
+ ,("style:num-suffix", ")")]
+ _ -> [("style:num-suffix", ".")]
+ format = case n of
+ UpperAlpha -> "A"
+ LowerAlpha -> "a"
+ UpperRoman -> "I"
+ LowerRoman -> "i"
+ _ -> "1"
+ listStyle = inTags True "text:list-level-style-number"
+ ([ ("text:level" , show $ 1 + length ls )
+ , ("text:style-name" , "Numbering_20_Symbols")
+ , ("style:num-format", format )
+ , ("text:start-value", show s )
+ ] ++ suffix) (listLevelStyle (1 + length ls))
+ in (l, ls ++ [listStyle])
+
+listLevelStyle :: Int -> Doc
+listLevelStyle i =
+ let indent = show (0.25 * fromIntegral i :: Double) in
+ selfClosingTag "style:list-level-properties"
+ [ ("text:space-before" , indent ++ "in")
+ , ("text:min-label-width", "0.25in")]
+
+tableStyle :: Int -> [(Char,Double)] -> Doc
+tableStyle num wcs =
+ let tableId = "Table" ++ show (num + 1)
+ table = inTags True "style:style"
+ [("style:name", tableId)
+ ,("style:family", "table")] $
+ selfClosingTag "style:table-properties"
+ [("table:align" , "center")]
+ colStyle (c,0) = selfClosingTag "style:style"
+ [ ("style:name" , tableId ++ "." ++ [c])
+ , ("style:family", "table-column" )]
+ colStyle (c,w) = inTags True "style:style"
+ [ ("style:name" , tableId ++ "." ++ [c])
+ , ("style:family", "table-column" )] $
+ selfClosingTag "style:table-column-properties"
+ [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))]
+ cellStyle = inTags True "style:style"
+ [ ("style:name" , tableId ++ ".A1")
+ , ("style:family", "table-cell" )] $
+ selfClosingTag "style:table-cell-properties"
+ [ ("fo:border", "none")]
+ columnStyles = map colStyle wcs
+ in table $$ vcat columnStyles $$ cellStyle
+
+paraStyle :: PandocMonad m => [(String,String)] -> OD m Int
+paraStyle attrs = do
+ pn <- (+) 1 . length <$> gets stParaStyles
+ i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara
+ b <- gets stInDefinition
+ t <- gets stTight
+ let styleAttr = [ ("style:name" , "P" ++ show pn)
+ , ("style:family" , "paragraph" )]
+ indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i
+ tight = if t then [ ("fo:margin-top" , "0in" )
+ , ("fo:margin-bottom" , "0in" )]
+ else []
+ indent = if (i /= 0 || b)
+ then [ ("fo:margin-left" , indentVal)
+ , ("fo:margin-right" , "0in" )
+ , ("fo:text-indent" , "0in" )
+ , ("style:auto-text-indent" , "false" )]
+ else []
+ attributes = indent ++ tight
+ paraProps = when (not $ null attributes) $
+ selfClosingTag "style:paragraph-properties" attributes
+ addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps
+ return pn
+
+paraListStyle :: PandocMonad m => Int -> OD m Int
+paraListStyle l = paraStyle
+ [("style:parent-style-name","Text_20_body")
+ ,("style:list-style-name", "L" ++ show l )]
+
+paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)]
+paraTableStyles _ _ [] = []
+paraTableStyles t s (a:xs)
+ | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs
+ | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs
+ | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs
+ where pName sn = "P" ++ show (sn + 1)
+ res sn x = inTags True "style:style"
+ [ ("style:name" , pName sn )
+ , ("style:family" , "paragraph" )
+ , ("style:parent-style-name", "Table_20_" ++ t)] $
+ selfClosingTag "style:paragraph-properties"
+ [ ("fo:text-align", x)
+ , ("style:justify-single-word", "false")]
+
+data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
+ deriving ( Eq,Ord )
+
+textStyleAttr :: TextStyle -> [(String,String)]
+textStyleAttr s
+ | Italic <- s = [("fo:font-style" ,"italic" )
+ ,("style:font-style-asian" ,"italic" )
+ ,("style:font-style-complex" ,"italic" )]
+ | Bold <- s = [("fo:font-weight" ,"bold" )
+ ,("style:font-weight-asian" ,"bold" )
+ ,("style:font-weight-complex" ,"bold" )]
+ | Strike <- s = [("style:text-line-through-style", "solid" )]
+ | Sub <- s = [("style:text-position" ,"sub 58%" )]
+ | Sup <- s = [("style:text-position" ,"super 58%" )]
+ | SmallC <- s = [("fo:font-variant" ,"small-caps")]
+ | Pre <- s = [("style:font-name" ,"Courier New")
+ ,("style:font-name-asian" ,"Courier New")
+ ,("style:font-name-complex" ,"Courier New")]
+ | otherwise = []
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
new file mode 100644
index 000000000..55d3fe656
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -0,0 +1,411 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
+ Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>,
+ and John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Org
+ Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Puneeth Chaganti <punchagan@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Emacs Org-Mode.
+
+Org-Mode: <http://orgmode.org>
+-}
+module Text.Pandoc.Writers.Org ( writeOrg) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Pretty
+import Text.Pandoc.Templates (renderTemplate')
+import Data.Char ( isAlphaNum, toLower )
+import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose )
+import Control.Monad.State
+import Text.Pandoc.Class (PandocMonad)
+
+data WriterState =
+ WriterState { stNotes :: [[Block]]
+ , stHasMath :: Bool
+ , stOptions :: WriterOptions
+ }
+
+-- | Convert Pandoc to Org.
+writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeOrg opts document = return $
+ let st = WriterState { stNotes = [],
+ stHasMath = False,
+ stOptions = opts }
+ in evalState (pandocToOrg document) st
+
+-- | Return Org representation of document.
+pandocToOrg :: Pandoc -> State WriterState String
+pandocToOrg (Pandoc meta blocks) = do
+ opts <- liftM stOptions get
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToOrg)
+ (fmap (render colwidth) . inlineListToOrg)
+ meta
+ body <- blockListToOrg blocks
+ notes <- liftM (reverse . stNotes) get >>= notesToOrg
+ hasMath <- liftM stHasMath get
+ let main = render colwidth $ foldl ($+$) empty $ [body, notes]
+ let context = defField "body" main
+ $ defField "math" hasMath
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Return Org representation of notes.
+notesToOrg :: [[Block]] -> State WriterState Doc
+notesToOrg notes =
+ mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
+ return . vsep
+
+-- | Return Org representation of a note.
+noteToOrg :: Int -> [Block] -> State WriterState Doc
+noteToOrg num note = do
+ contents <- blockListToOrg note
+ let marker = "[fn:" ++ show num ++ "] "
+ return $ hang (length marker) (text marker) contents
+
+-- | Escape special characters for Org.
+escapeString :: String -> String
+escapeString = escapeStringUsing $
+ [ ('\x2014',"---")
+ , ('\x2013',"--")
+ , ('\x2019',"'")
+ , ('\x2026',"...")
+ ] ++ backslashEscapes "^_"
+
+isRawFormat :: Format -> Bool
+isRawFormat f =
+ f == Format "latex" || f == Format "tex" || f == Format "org"
+
+-- | Convert Pandoc block element to Org.
+blockToOrg :: Block -- ^ Block element
+ -> State WriterState Doc
+blockToOrg Null = return empty
+blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do
+ contents <- blockListToOrg bs
+ let drawerNameTag = ":" <> text cls <> ":"
+ let keys = vcat $ map (\(k,v) ->
+ ":" <> text k <> ":"
+ <> space <> text v) kvs
+ let drawerEndTag = text ":END:"
+ return $ drawerNameTag $$ cr $$ keys $$
+ blankline $$ contents $$
+ blankline $$ drawerEndTag $$
+ blankline
+blockToOrg (Div attrs bs) = do
+ contents <- blockListToOrg bs
+ let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower
+ return $ case attrs of
+ ("", [], []) ->
+ -- nullAttr, treat contents as if it wasn't wrapped
+ blankline $$ contents $$ blankline
+ (ident, [], []) ->
+ -- only an id: add id as an anchor, unwrap the rest
+ blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline
+ (ident, classes, kv) ->
+ -- if one class looks like the name of a greater block then output as
+ -- such: The ID, if present, is added via the #+NAME keyword; other
+ -- classes and key-value pairs are kept as #+ATTR_HTML attributes.
+ let
+ (blockTypeCand, classes') = partition isGreaterBlockClass classes
+ in case blockTypeCand of
+ (blockType:classes'') ->
+ blankline $$ attrHtml (ident, classes'' <> classes', kv) $$
+ "#+BEGIN_" <> text blockType $$ contents $$
+ "#+END_" <> text blockType $$ blankline
+ _ ->
+ -- fallback: wrap in div tags
+ let
+ startTag = tagWithAttrs "div" attrs
+ endTag = text "</div>"
+ in blankline $$ "#+BEGIN_HTML" $$
+ nest 2 startTag $$ "#+END_HTML" $$ blankline $$
+ contents $$ blankline $$ "#+BEGIN_HTML" $$
+ nest 2 endTag $$ "#+END_HTML" $$ blankline
+blockToOrg (Plain inlines) = inlineListToOrg inlines
+-- title beginning with fig: indicates that the image is a figure
+blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return empty
+ else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt
+ img <- inlineToOrg (Image attr txt (src,tit))
+ return $ capt $$ img $$ blankline
+blockToOrg (Para inlines) = do
+ contents <- inlineListToOrg inlines
+ return $ contents <> blankline
+blockToOrg (LineBlock lns) = do
+ let splitStanza [] = []
+ splitStanza xs = case break (== mempty) xs of
+ (l, []) -> l : []
+ (l, _:r) -> l : splitStanza r
+ let joinWithLinefeeds = nowrap . mconcat . intersperse cr
+ let joinWithBlankLines = mconcat . intersperse blankline
+ let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls
+ contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns)
+ return $ blankline $$ "#+BEGIN_VERSE" $$
+ nest 2 contents $$ "#+END_VERSE" <> blankline
+blockToOrg (RawBlock "html" str) =
+ return $ blankline $$ "#+BEGIN_HTML" $$
+ nest 2 (text str) $$ "#+END_HTML" $$ blankline
+blockToOrg (RawBlock f str) | isRawFormat f =
+ return $ text str
+blockToOrg (RawBlock _ _) = return empty
+blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
+blockToOrg (Header level attr inlines) = do
+ contents <- inlineListToOrg inlines
+ let headerStr = text $ if level > 999 then " " else replicate level '*'
+ let drawerStr = if attr == nullAttr
+ then empty
+ else cr <> nest (level + 1) (propertiesDrawer attr)
+ return $ headerStr <> " " <> contents <> drawerStr <> blankline
+blockToOrg (CodeBlock (_,classes,_) str) = do
+ opts <- stOptions <$> get
+ let tabstop = writerTabStop opts
+ let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers
+ let (beg, end) = case at of
+ [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE")
+ (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC")
+ return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline
+blockToOrg (BlockQuote blocks) = do
+ contents <- blockListToOrg blocks
+ return $ blankline $$ "#+BEGIN_QUOTE" $$
+ nest 2 contents $$ "#+END_QUOTE" $$ blankline
+blockToOrg (Table caption' _ _ headers rows) = do
+ caption'' <- inlineListToOrg caption'
+ let caption = if null caption'
+ then empty
+ else ("#+CAPTION: " <> caption'')
+ headers' <- mapM blockListToOrg headers
+ rawRows <- mapM (mapM blockListToOrg) rows
+ let numChars = maximum . map offset
+ -- FIXME: width is not being used.
+ let widthsInChars =
+ map ((+2) . numChars) $ transpose (headers' : rawRows)
+ -- FIXME: Org doesn't allow blocks with height more than 1.
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (1 : map height blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM blockListToOrg row
+ return $ makeRow cols) rows
+ let border ch = char '|' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '|'
+ let body = vcat rows'
+ let head'' = if all null headers
+ then empty
+ else head' $$ border '-'
+ return $ head'' $$ body $$ caption $$ blankline
+blockToOrg (BulletList items) = do
+ contents <- mapM bulletListItemToOrg items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $+$ vcat contents $$ blankline
+blockToOrg (OrderedList (start, _, delim) items) = do
+ let delim' = case delim of
+ TwoParens -> OneParen
+ x -> x
+ let markers = take (length items) $ orderedListMarkers
+ (start, Decimal, delim')
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ vcat contents $$ blankline
+blockToOrg (DefinitionList items) = do
+ contents <- mapM definitionListItemToOrg items
+ return $ vcat contents $$ blankline
+
+-- | Convert bullet list item (list of blocks) to Org.
+bulletListItemToOrg :: [Block] -> State WriterState Doc
+bulletListItemToOrg items = do
+ contents <- blockListToOrg items
+ return $ hang 2 "- " (contents <> cr)
+
+-- | Convert ordered list item (a list of blocks) to Org.
+orderedListItemToOrg :: String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToOrg marker items = do
+ contents <- blockListToOrg items
+ return $ hang (length marker + 1) (text marker <> space) (contents <> cr)
+
+-- | Convert defintion list item (label, list of blocks) to Org.
+definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc
+definitionListItemToOrg (label, defs) = do
+ label' <- inlineListToOrg label
+ contents <- liftM vcat $ mapM blockListToOrg defs
+ return $ hang 2 "- " $ label' <> " :: " <> (contents <> cr)
+
+-- | Convert list of key/value pairs to Org :PROPERTIES: drawer.
+propertiesDrawer :: Attr -> Doc
+propertiesDrawer (ident, classes, kv) =
+ let
+ drawerStart = text ":PROPERTIES:"
+ drawerEnd = text ":END:"
+ kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv
+ kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv'
+ properties = vcat $ map kvToOrgProperty kv''
+ in
+ drawerStart <> cr <> properties <> cr <> drawerEnd
+ where
+ kvToOrgProperty :: (String, String) -> Doc
+ kvToOrgProperty (key, value) =
+ text ":" <> text key <> text ": " <> text value <> cr
+
+attrHtml :: Attr -> Doc
+attrHtml ("" , [] , []) = mempty
+attrHtml (ident, classes, kvs) =
+ let
+ name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr
+ keyword = "#+ATTR_HTML"
+ classKv = ("class", unwords classes)
+ kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs)
+ in name <> keyword <> ": " <> text (unwords kvStrings) <> cr
+
+-- | Convert list of Pandoc block elements to Org.
+blockListToOrg :: [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to Org.
+inlineListToOrg :: [Inline] -> State WriterState Doc
+inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
+
+-- | Convert Pandoc inline element to Org.
+inlineToOrg :: Inline -> State WriterState Doc
+inlineToOrg (Span (uid, [], []) []) =
+ return $ "<<" <> text uid <> ">>"
+inlineToOrg (Span _ lst) =
+ inlineListToOrg lst
+inlineToOrg (Emph lst) = do
+ contents <- inlineListToOrg lst
+ return $ "/" <> contents <> "/"
+inlineToOrg (Strong lst) = do
+ contents <- inlineListToOrg lst
+ return $ "*" <> contents <> "*"
+inlineToOrg (Strikeout lst) = do
+ contents <- inlineListToOrg lst
+ return $ "+" <> contents <> "+"
+inlineToOrg (Superscript lst) = do
+ contents <- inlineListToOrg lst
+ return $ "^{" <> contents <> "}"
+inlineToOrg (Subscript lst) = do
+ contents <- inlineListToOrg lst
+ return $ "_{" <> contents <> "}"
+inlineToOrg (SmallCaps lst) = inlineListToOrg lst
+inlineToOrg (Quoted SingleQuote lst) = do
+ contents <- inlineListToOrg lst
+ return $ "'" <> contents <> "'"
+inlineToOrg (Quoted DoubleQuote lst) = do
+ contents <- inlineListToOrg lst
+ return $ "\"" <> contents <> "\""
+inlineToOrg (Cite _ lst) = inlineListToOrg lst
+inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
+inlineToOrg (Str str) = return $ text $ escapeString str
+inlineToOrg (Math t str) = do
+ modify $ \st -> st{ stHasMath = True }
+ return $ if t == InlineMath
+ then "$" <> text str <> "$"
+ else "$$" <> text str <> "$$"
+inlineToOrg (RawInline f@(Format f') str) =
+ return $ if isRawFormat f
+ then text str
+ else "@@" <> text f' <> ":" <> text str <> "@@"
+inlineToOrg LineBreak = return (text "\\\\" <> cr)
+inlineToOrg Space = return space
+inlineToOrg SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ case wrapText of
+ WrapPreserve -> return cr
+ WrapAuto -> return space
+ WrapNone -> return space
+inlineToOrg (Link _ txt (src, _)) = do
+ case txt of
+ [Str x] | escapeURI x == src -> -- autolink
+ do return $ "[[" <> text (orgPath x) <> "]]"
+ _ -> do contents <- inlineListToOrg txt
+ return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]"
+inlineToOrg (Image _ _ (source, _)) = do
+ return $ "[[" <> text (orgPath source) <> "]]"
+inlineToOrg (Note contents) = do
+ -- add to notes in state
+ notes <- get >>= (return . stNotes)
+ modify $ \st -> st { stNotes = contents:notes }
+ let ref = show $ (length notes) + 1
+ return $ "[fn:" <> text ref <> "]"
+
+orgPath :: String -> String
+orgPath src =
+ case src of
+ [] -> mempty -- wiki link
+ ('#':_) -> src -- internal link
+ _ | isUrl src -> src
+ _ | isFilePath src -> src
+ _ -> "file:" <> src
+ where
+ isFilePath :: String -> Bool
+ isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"]
+
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
+
+-- | Translate from pandoc's programming language identifiers to those used by
+-- org-mode.
+pandocLangToOrg :: String -> String
+pandocLangToOrg cs =
+ case cs of
+ "c" -> "C"
+ "cpp" -> "C++"
+ "commonlisp" -> "lisp"
+ "r" -> "R"
+ "bash" -> "sh"
+ _ -> cs
+
+-- | List of language identifiers recognized by org-mode.
+orgLangIdentifiers :: [String]
+orgLangIdentifiers =
+ [ "asymptote", "awk", "C", "C++", "clojure", "css", "d", "ditaa", "dot"
+ , "calc", "emacs-lisp", "fortran", "gnuplot", "haskell", "java", "js"
+ , "latex", "ledger", "lisp", "lilypond", "matlab", "mscgen", "ocaml"
+ , "octave", "org", "oz", "perl", "plantuml", "processing", "python", "R"
+ , "ruby", "sass", "scheme", "screen", "sed", "sh", "sql", "sqlite"
+ ]
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
new file mode 100644
index 000000000..5cce64d17
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -0,0 +1,556 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RST
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to reStructuredText.
+
+reStructuredText: <http://docutils.sourceforge.net/rst.html>
+-}
+module Text.Pandoc.Writers.RST ( writeRST ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Builder (deleteMeta)
+import Data.Maybe (fromMaybe)
+import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose )
+import Network.URI (isURI)
+import Text.Pandoc.Pretty
+import Control.Monad.State
+import Data.Char (isSpace, toLower)
+import Text.Pandoc.Class (PandocMonad)
+
+type Refs = [([Inline], Target)]
+
+data WriterState =
+ WriterState { stNotes :: [[Block]]
+ , stLinks :: Refs
+ , stImages :: [([Inline], (Attr, String, String, Maybe String))]
+ , stHasMath :: Bool
+ , stHasRawTeX :: Bool
+ , stOptions :: WriterOptions
+ , stTopLevel :: Bool
+ }
+
+-- | Convert Pandoc to RST.
+writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeRST opts document = return $
+ let st = WriterState { stNotes = [], stLinks = [],
+ stImages = [], stHasMath = False,
+ stHasRawTeX = False, stOptions = opts,
+ stTopLevel = True}
+ in evalState (pandocToRST document) st
+
+-- | Return RST representation of document.
+pandocToRST :: Pandoc -> State WriterState String
+pandocToRST (Pandoc meta blocks) = do
+ opts <- liftM stOptions get
+ let colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ let subtit = case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> xs
+ _ -> []
+ title <- titleToRST (docTitle meta) subtit
+ metadata <- metaToJSON opts
+ (fmap (render colwidth) . blockListToRST)
+ (fmap (trimr . render colwidth) . inlineListToRST)
+ $ deleteMeta "title" $ deleteMeta "subtitle" meta
+ body <- blockListToRST' True $ case writerTemplate opts of
+ Just _ -> normalizeHeadings 1 blocks
+ Nothing -> blocks
+ notes <- liftM (reverse . stNotes) get >>= notesToRST
+ -- note that the notes may contain refs, so we do them first
+ refs <- liftM (reverse . stLinks) get >>= refsToRST
+ pics <- liftM (reverse . stImages) get >>= pictRefsToRST
+ hasMath <- liftM stHasMath get
+ rawTeX <- liftM stHasRawTeX get
+ let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
+ let context = defField "body" main
+ $ defField "toc" (writerTableOfContents opts)
+ $ defField "toc-depth" (show $ writerTOCDepth opts)
+ $ defField "math" hasMath
+ $ defField "title" (render Nothing title :: String)
+ $ defField "math" hasMath
+ $ defField "rawtex" rawTeX
+ $ metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+ where
+ normalizeHeadings lev (Header l a i:bs) =
+ Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
+ where (cont,bs') = break (headerLtEq l) bs
+ headerLtEq level (Header l' _ _) = l' <= level
+ headerLtEq _ _ = False
+ normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs
+ normalizeHeadings _ [] = []
+
+-- | Return RST representation of reference key table.
+refsToRST :: Refs -> State WriterState Doc
+refsToRST refs = mapM keyToRST refs >>= return . vcat
+
+-- | Return RST representation of a reference key.
+keyToRST :: ([Inline], (String, String))
+ -> State WriterState Doc
+keyToRST (label, (src, _)) = do
+ label' <- inlineListToRST label
+ let label'' = if ':' `elem` ((render Nothing label') :: String)
+ then char '`' <> label' <> char '`'
+ else label'
+ return $ nowrap $ ".. _" <> label'' <> ": " <> text src
+
+-- | Return RST representation of notes.
+notesToRST :: [[Block]] -> State WriterState Doc
+notesToRST notes =
+ mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
+ return . vsep
+
+-- | Return RST representation of a note.
+noteToRST :: Int -> [Block] -> State WriterState Doc
+noteToRST num note = do
+ contents <- blockListToRST note
+ let marker = ".. [" <> text (show num) <> "]"
+ return $ nowrap $ marker $$ nest 3 contents
+
+-- | Return RST representation of picture reference table.
+pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))]
+ -> State WriterState Doc
+pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
+
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: ([Inline], (Attr, String, String, Maybe String))
+ -> State WriterState Doc
+pictToRST (label, (attr, src, _, mbtarget)) = do
+ label' <- inlineListToRST label
+ dims <- imageDimsToRST attr
+ let (_, cls, _) = attr
+ classes = if null cls
+ then empty
+ else ":class: " <> text (unwords cls)
+ return $ nowrap
+ $ ".. |" <> label' <> "| image:: " <> text src $$ hang 3 empty (classes $$ dims)
+ $$ case mbtarget of
+ Nothing -> empty
+ Just t -> " :target: " <> text t
+
+-- | Escape special characters for RST.
+escapeString :: WriterOptions -> String -> String
+escapeString _ [] = []
+escapeString opts (c:cs) =
+ case c of
+ _ | c `elem` ['\\','`','*','_','|'] -> '\\':c:escapeString opts cs
+ '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
+ '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
+ '-' | isEnabled Ext_smart opts ->
+ case cs of
+ '-':_ -> '\\':'-':escapeString opts cs
+ _ -> '-':escapeString opts cs
+ '.' | isEnabled Ext_smart opts ->
+ case cs of
+ '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest
+ _ -> '.':escapeString opts cs
+ _ -> c : escapeString opts cs
+
+titleToRST :: [Inline] -> [Inline] -> State WriterState Doc
+titleToRST [] _ = return empty
+titleToRST tit subtit = do
+ title <- inlineListToRST tit
+ subtitle <- inlineListToRST subtit
+ return $ bordered title '=' $$ bordered subtitle '-'
+
+bordered :: Doc -> Char -> Doc
+bordered contents c =
+ if len > 0
+ then border $$ contents $$ border
+ else empty
+ where len = offset contents
+ border = text (replicate len c)
+
+-- | Convert Pandoc block element to RST.
+blockToRST :: Block -- ^ Block element
+ -> State WriterState Doc
+blockToRST Null = return empty
+blockToRST (Div attr bs) = do
+ contents <- blockListToRST bs
+ let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr)
+ let endTag = ".. raw:: html" $+$ nest 3 "</div>"
+ return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline
+blockToRST (Plain inlines) = inlineListToRST inlines
+-- title beginning with fig: indicates that the image is a figure
+blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- inlineListToRST txt
+ dims <- imageDimsToRST attr
+ let fig = "figure:: " <> text src
+ alt = ":alt: " <> if null tit then capt else text tit
+ (_,cls,_) = attr
+ classes = if null cls
+ then empty
+ else ":figclass: " <> text (unwords cls)
+ return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline
+blockToRST (Para inlines)
+ | LineBreak `elem` inlines = do -- use line block if LineBreaks
+ linesToLineBlock $ splitBy (==LineBreak) inlines
+ | otherwise = do
+ contents <- inlineListToRST inlines
+ return $ contents <> blankline
+blockToRST (LineBlock lns) =
+ linesToLineBlock lns
+blockToRST (RawBlock f@(Format f') str)
+ | f == "rst" = return $ text str
+ | otherwise = return $ blankline <> ".. raw:: " <>
+ text (map toLower f') $+$
+ (nest 3 $ text str) $$ blankline
+blockToRST HorizontalRule =
+ return $ blankline $$ "--------------" $$ blankline
+blockToRST (Header level (name,classes,_) inlines) = do
+ contents <- inlineListToRST inlines
+ isTopLevel <- gets stTopLevel
+ if isTopLevel
+ then do
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate (offset contents) headerChar
+ return $ nowrap $ contents $$ border $$ blankline
+ else do
+ let rub = "rubric:: " <> contents
+ let name' | null name = empty
+ | otherwise = ":name: " <> text name
+ let cls | null classes = empty
+ | otherwise = ":class: " <> text (unwords classes)
+ return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline
+blockToRST (CodeBlock (_,classes,kvs) str) = do
+ opts <- stOptions <$> get
+ let tabstop = writerTabStop opts
+ let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs
+ let numberlines = if "numberLines" `elem` classes
+ then " :number-lines:" <> startnum
+ else empty
+ if "haskell" `elem` classes && "literate" `elem` classes &&
+ isEnabled Ext_literate_haskell opts
+ then return $ prefixed "> " (text str) $$ blankline
+ else return $
+ (case [c | c <- classes,
+ c `notElem` ["sourceCode","literate","numberLines"]] of
+ [] -> "::"
+ (lang:_) -> (".. code:: " <> text lang) $$ numberlines)
+ $+$ nest tabstop (text str) $$ blankline
+blockToRST (BlockQuote blocks) = do
+ tabstop <- get >>= (return . writerTabStop . stOptions)
+ contents <- blockListToRST blocks
+ return $ (nest tabstop contents) <> blankline
+blockToRST (Table caption _ widths headers rows) = do
+ caption' <- inlineListToRST caption
+ headers' <- mapM blockListToRST headers
+ rawRows <- mapM (mapM blockListToRST) rows
+ -- let isSimpleCell [Plain _] = True
+ -- isSimpleCell [Para _] = True
+ -- isSimpleCell [] = True
+ -- isSimpleCell _ = False
+ -- let isSimple = all (==0) widths && all (all isSimpleCell) rows
+ let numChars = maximum . map offset
+ opts <- get >>= return . stOptions
+ let widthsInChars =
+ if all (== 0) widths
+ then map ((+2) . numChars) $ transpose (headers' : rawRows)
+ else map (floor . (fromIntegral (writerColumns opts) *)) widths
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = height (hcat blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow headers'
+ let rows' = map makeRow rawRows
+ let border ch = char '+' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '+'
+ let body = vcat $ intersperse (border '-') rows'
+ let head'' = if all null headers
+ then empty
+ else head' $$ border '='
+ let tbl = border '-' $$ head'' $$ body $$ border '-'
+ return $ if null caption
+ then tbl $$ blankline
+ else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$
+ blankline
+blockToRST (BulletList items) = do
+ contents <- mapM bulletListItemToRST items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ chomp (vcat contents) $$ blankline
+blockToRST (OrderedList (start, style', delim) items) = do
+ let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
+ then take (length items) $ repeat "#."
+ else take (length items) $ orderedListMarkers
+ (start, style', delim)
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ chomp (vcat contents) $$ blankline
+blockToRST (DefinitionList items) = do
+ contents <- mapM definitionListItemToRST items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ chomp (vcat contents) $$ blankline
+
+-- | Convert bullet list item (list of blocks) to RST.
+bulletListItemToRST :: [Block] -> State WriterState Doc
+bulletListItemToRST items = do
+ contents <- blockListToRST items
+ return $ hang 3 "- " $ contents <> cr
+
+-- | Convert ordered list item (a list of blocks) to RST.
+orderedListItemToRST :: String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToRST marker items = do
+ contents <- blockListToRST items
+ let marker' = marker ++ " "
+ return $ hang (length marker') (text marker') $ contents <> cr
+
+-- | Convert defintion list item (label, list of blocks) to RST.
+definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc
+definitionListItemToRST (label, defs) = do
+ label' <- inlineListToRST label
+ contents <- liftM vcat $ mapM blockListToRST defs
+ tabstop <- get >>= (return . writerTabStop . stOptions)
+ return $ label' $$ nest tabstop (nestle contents <> cr)
+
+-- | Format a list of lines as line block.
+linesToLineBlock :: [[Inline]] -> State WriterState Doc
+linesToLineBlock inlineLines = do
+ lns <- mapM inlineListToRST inlineLines
+ return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline
+
+-- | Convert list of Pandoc block elements to RST.
+blockListToRST' :: Bool
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST' topLevel blocks = do
+ tl <- gets stTopLevel
+ modify (\s->s{stTopLevel=topLevel})
+ res <- vcat `fmap` mapM blockToRST blocks
+ modify (\s->s{stTopLevel=tl})
+ return res
+
+blockListToRST :: [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST = blockListToRST' False
+
+-- | Convert list of Pandoc inline elements to RST.
+inlineListToRST :: [Inline] -> State WriterState Doc
+inlineListToRST lst =
+ mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
+ return . hcat
+ where -- remove spaces after displaymath, as they screw up indentation:
+ removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
+ Math DisplayMath x : dropWhile (==Space) zs
+ removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs
+ removeSpaceAfterDisplayMath [] = []
+ insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
+ insertBS (x:y:z:zs)
+ | isComplex y && (surroundComplex x z) =
+ x : y : insertBS (z : zs)
+ insertBS (x:y:zs)
+ | isComplex x && not (okAfterComplex y) =
+ x : RawInline "rst" "\\ " : insertBS (y : zs)
+ | isComplex y && not (okBeforeComplex x) =
+ x : RawInline "rst" "\\ " : insertBS (y : zs)
+ | otherwise =
+ x : insertBS (y : zs)
+ insertBS (x:ys) = x : insertBS ys
+ insertBS [] = []
+ surroundComplex :: Inline -> Inline -> Bool
+ surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =
+ case (last s, head s') of
+ ('\'','\'') -> True
+ ('"','"') -> True
+ ('<','>') -> True
+ ('[',']') -> True
+ ('{','}') -> True
+ _ -> False
+ surroundComplex _ _ = False
+ okAfterComplex :: Inline -> Bool
+ okAfterComplex Space = True
+ okAfterComplex SoftBreak = True
+ okAfterComplex LineBreak = True
+ okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String)
+ okAfterComplex _ = False
+ okBeforeComplex :: Inline -> Bool
+ okBeforeComplex Space = True
+ okBeforeComplex SoftBreak = True
+ okBeforeComplex LineBreak = True
+ okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String)
+ okBeforeComplex _ = False
+ isComplex :: Inline -> Bool
+ isComplex (Emph _) = True
+ isComplex (Strong _) = True
+ isComplex (SmallCaps _) = True
+ isComplex (Strikeout _) = True
+ isComplex (Superscript _) = True
+ isComplex (Subscript _) = True
+ isComplex (Link _ _ _) = True
+ isComplex (Image _ _ _) = True
+ isComplex (Code _ _) = True
+ isComplex (Math _ _) = True
+ isComplex (Cite _ (x:_)) = isComplex x
+ isComplex (Span _ (x:_)) = isComplex x
+ isComplex _ = False
+
+-- | Convert Pandoc inline element to RST.
+inlineToRST :: Inline -> State WriterState Doc
+inlineToRST (Span _ ils) = inlineListToRST ils
+inlineToRST (Emph lst) = do
+ contents <- inlineListToRST lst
+ return $ "*" <> contents <> "*"
+inlineToRST (Strong lst) = do
+ contents <- inlineListToRST lst
+ return $ "**" <> contents <> "**"
+inlineToRST (Strikeout lst) = do
+ contents <- inlineListToRST lst
+ return $ "[STRIKEOUT:" <> contents <> "]"
+inlineToRST (Superscript lst) = do
+ contents <- inlineListToRST lst
+ return $ ":sup:`" <> contents <> "`"
+inlineToRST (Subscript lst) = do
+ contents <- inlineListToRST lst
+ return $ ":sub:`" <> contents <> "`"
+inlineToRST (SmallCaps lst) = inlineListToRST lst
+inlineToRST (Quoted SingleQuote lst) = do
+ contents <- inlineListToRST lst
+ opts <- gets stOptions
+ if isEnabled Ext_smart opts
+ then return $ "'" <> contents <> "'"
+ else return $ "‘" <> contents <> "’"
+inlineToRST (Quoted DoubleQuote lst) = do
+ contents <- inlineListToRST lst
+ opts <- gets stOptions
+ if isEnabled Ext_smart opts
+ then return $ "\"" <> contents <> "\""
+ else return $ "“" <> contents <> "”"
+inlineToRST (Cite _ lst) =
+ inlineListToRST lst
+inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
+inlineToRST (Str str) = do
+ opts <- gets stOptions
+ return $ text $
+ (if isEnabled Ext_smart opts
+ then unsmartify opts
+ else id) $ escapeString opts str
+inlineToRST (Math t str) = do
+ modify $ \st -> st{ stHasMath = True }
+ return $ if t == InlineMath
+ then ":math:`" <> text str <> "`"
+ else if '\n' `elem` str
+ then blankline $$ ".. math::" $$
+ blankline $$ nest 3 (text str) $$ blankline
+ else blankline $$ (".. math:: " <> text str) $$ blankline
+inlineToRST (RawInline f x)
+ | f == "rst" = return $ text x
+ | f == "latex" || f == "tex" = do
+ modify $ \st -> st{ stHasRawTeX = True }
+ return $ ":raw-latex:`" <> text x <> "`"
+ | otherwise = return empty
+inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
+inlineToRST Space = return space
+inlineToRST SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ case wrapText of
+ WrapPreserve -> return cr
+ WrapAuto -> return space
+ WrapNone -> return space
+-- autolink
+inlineToRST (Link _ [Str str] (src, _))
+ | isURI src &&
+ if "mailto:" `isPrefixOf` src
+ then src == escapeURI ("mailto:" ++ str)
+ else src == escapeURI str = do
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
+ return $ text srcSuffix
+inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do
+ label <- registerImage attr alt (imgsrc,imgtit) (Just src)
+ return $ "|" <> label <> "|"
+inlineToRST (Link _ txt (src, tit)) = do
+ useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
+ linktext <- inlineListToRST $ normalizeSpaces txt
+ if useReferenceLinks
+ then do refs <- get >>= return . stLinks
+ case lookup txt refs of
+ Just (src',tit') ->
+ if src == src' && tit == tit'
+ then return $ "`" <> linktext <> "`_"
+ else do -- duplicate label, use non-reference link
+ return $ "`" <> linktext <> " <" <> text src <> ">`__"
+ Nothing -> do
+ modify $ \st -> st { stLinks = (txt,(src,tit)):refs }
+ return $ "`" <> linktext <> "`_"
+ else return $ "`" <> linktext <> " <" <> text src <> ">`__"
+inlineToRST (Image attr alternate (source, tit)) = do
+ label <- registerImage attr alternate (source,tit) Nothing
+ return $ "|" <> label <> "|"
+inlineToRST (Note contents) = do
+ -- add to notes in state
+ notes <- gets stNotes
+ modify $ \st -> st { stNotes = contents:notes }
+ let ref = show $ (length notes) + 1
+ return $ " [" <> text ref <> "]_"
+
+registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc
+registerImage attr alt (src,tit) mbtarget = do
+ pics <- get >>= return . stImages
+ txt <- case lookup alt pics of
+ Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget)
+ -> return alt
+ _ -> do
+ let alt' = if null alt || alt == [Str ""]
+ then [Str $ "image" ++ show (length pics)]
+ else alt
+ modify $ \st -> st { stImages =
+ (alt', (attr,src,tit, mbtarget)):stImages st }
+ return alt'
+ inlineListToRST txt
+
+imageDimsToRST :: Attr -> State WriterState Doc
+imageDimsToRST attr = do
+ let (ident, _, _) = attr
+ name = if null ident
+ then empty
+ else ":name: " <> text ident
+ showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d)
+ in case (dimension dir attr) of
+ Just (Percent a) ->
+ case dir of
+ Height -> empty
+ Width -> cols (Percent a)
+ Just dim -> cols dim
+ Nothing -> empty
+ return $ cr <> name $$ showDim Width $$ showDim Height
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
new file mode 100644
index 000000000..ef012e58e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -0,0 +1,412 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RTF
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to RTF (rich text format).
+-}
+module Text.Pandoc.Writers.RTF ( writeRTF
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Writers.Math
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Walk
+import Text.Pandoc.Logging
+import Data.List ( isSuffixOf, intercalate )
+import Data.Char ( ord, chr, isDigit )
+import qualified Data.ByteString as B
+import qualified Data.Map as M
+import Text.Printf ( printf )
+import Text.Pandoc.ImageSize
+import Control.Monad.Except (throwError, runExceptT, lift)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import qualified Text.Pandoc.Class as P
+
+-- | Convert Image inlines into a raw RTF embedded image, read from a file,
+-- or a MediaBag, or the internet.
+-- If file not found or filetype not jpeg or png, leave the inline unchanged.
+rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline
+rtfEmbedImage opts x@(Image attr _ (src,_)) = do
+ result <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src
+ case result of
+ Right (imgdata, Just mime)
+ | mime == "image/jpeg" || mime == "image/png" -> do
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ filetype <- case mime of
+ "image/jpeg" -> return "\\jpegblip"
+ "image/png" -> return "\\pngblip"
+ _ -> throwError $ PandocSomeError "Unknown file type"
+ sizeSpec <- case imageSize imgdata of
+ Left msg -> do
+ report $ CouldNotDetermineImageSize src msg
+ return ""
+ Right sz -> return $ "\\picw" ++ show xpx ++
+ "\\pich" ++ show ypx ++
+ "\\picwgoal" ++ show (floor (xpt * 20) :: Integer)
+ ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer)
+ -- twip = 1/1440in = 1/20pt
+ where (xpx, ypx) = sizeInPixels sz
+ (xpt, ypt) = desiredSizeInPoints opts attr sz
+ let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++
+ concat bytes ++ "}"
+ if B.null imgdata
+ then do
+ report $ CouldNotFetchResource src "image contained no data"
+ return x
+ else return $ RawInline (Format "rtf") raw
+ | otherwise -> do
+ report $ CouldNotFetchResource src "image is not a jpeg or png"
+ return x
+ Right (_, Nothing) -> do
+ report $ CouldNotDetermineMimeType src
+ return x
+ Left ( e :: PandocError ) -> do
+ report $ CouldNotFetchResource src (show e)
+ return x
+rtfEmbedImage _ x = return x
+
+-- | Convert Pandoc to a string in rich text format.
+writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeRTF options doc = do
+ -- handle images
+ Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc
+ let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta
+ let toPlain (MetaBlocks [Para ils]) = MetaInlines ils
+ toPlain x = x
+ -- adjust title, author, date so we don't get para inside para
+ let meta' = Meta $ M.adjust toPlain "title"
+ . M.adjust toPlain "author"
+ . M.adjust toPlain "date"
+ $ metamap
+ metadata <- metaToJSON options
+ (fmap concat . mapM (blockToRTF 0 AlignDefault))
+ (inlinesToRTF)
+ meta'
+ body <- blocksToRTF 0 AlignDefault blocks
+ let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options
+ isTOCHeader _ = False
+ toc <- tableOfContents $ filter isTOCHeader blocks
+ let context = defField "body" body
+ $ defField "spacer" spacer
+ $ (if writerTableOfContents options
+ then defField "toc" toc
+ else id)
+ $ metadata
+ return $ case writerTemplate options of
+ Just tpl -> renderTemplate' tpl context
+ Nothing -> case reverse body of
+ ('\n':_) -> body
+ _ -> body ++ "\n"
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: PandocMonad m => [Block] -> m String
+tableOfContents headers = do
+ let contents = map elementToListItem $ hierarchicalize headers
+ blocksToRTF 0 AlignDefault $
+ [Header 1 nullAttr [Str "Contents"], BulletList contents]
+
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++
+ if null subsecs
+ then []
+ else [BulletList (map elementToListItem subsecs)]
+
+-- | Convert unicode characters (> 127) into rich text format representation.
+handleUnicode :: String -> String
+handleUnicode [] = []
+handleUnicode (c:cs) =
+ if ord c > 127
+ then if surrogate c
+ then let x = ord c - 0x10000
+ (q, r) = x `divMod` 0x400
+ upper = q + 0xd800
+ lower = r + 0xDC00
+ in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs
+ else enc c ++ handleUnicode cs
+ else c:(handleUnicode cs)
+ where
+ surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff)
+ || (0xe000 <= ord x && ord x <= 0xffff) )
+ enc x = '\\':'u':(show (ord x)) ++ "?"
+
+-- | Escape special characters.
+escapeSpecial :: String -> String
+escapeSpecial = escapeStringUsing $
+ [ ('\t',"\\tab ")
+ , ('\8216',"\\u8216'")
+ , ('\8217',"\\u8217'")
+ , ('\8220',"\\u8220\"")
+ , ('\8221',"\\u8221\"")
+ , ('\8211',"\\u8211-")
+ , ('\8212',"\\u8212-")
+ ] ++ backslashEscapes "{\\}"
+
+-- | Escape strings as needed for rich text format.
+stringToRTF :: String -> String
+stringToRTF = handleUnicode . escapeSpecial
+
+-- | Escape things as needed for code block in RTF.
+codeStringToRTF :: String -> String
+codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str)
+
+-- | Make a paragraph with first-line indent, block indent, and space after.
+rtfParSpaced :: Int -- ^ space after (in twips)
+ -> Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent alignment content =
+ let alignString = case alignment of
+ AlignLeft -> "\\ql "
+ AlignRight -> "\\qr "
+ AlignCenter -> "\\qc "
+ AlignDefault -> "\\ql "
+ in "{\\pard " ++ alignString ++
+ "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+
+-- | Default paragraph.
+rtfPar :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfPar = rtfParSpaced 180
+
+-- | Compact paragraph (e.g. for compact list items).
+rtfCompact :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfCompact = rtfParSpaced 0
+
+-- number of twips to indent
+indentIncrement :: Int
+indentIncrement = 720
+
+listIncrement :: Int
+listIncrement = 360
+
+-- | Returns appropriate bullet list marker for indent level.
+bulletMarker :: Int -> String
+bulletMarker indent = case indent `mod` 720 of
+ 0 -> "\\bullet "
+ _ -> "\\endash "
+
+-- | Returns appropriate (list of) ordered list markers for indent level.
+orderedMarkers :: Int -> ListAttributes -> [String]
+orderedMarkers indent (start, style, delim) =
+ if style == DefaultStyle && delim == DefaultDelim
+ then case indent `mod` 720 of
+ 0 -> orderedListMarkers (start, Decimal, Period)
+ _ -> orderedListMarkers (start, LowerAlpha, Period)
+ else orderedListMarkers (start, style, delim)
+
+blocksToRTF :: PandocMonad m
+ => Int
+ -> Alignment
+ -> [Block]
+ -> m String
+blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align)
+
+-- | Convert Pandoc block element to RTF.
+blockToRTF :: PandocMonad m
+ => Int -- ^ indent level
+ -> Alignment -- ^ alignment
+ -> Block -- ^ block to convert
+ -> m String
+blockToRTF _ _ Null = return ""
+blockToRTF indent alignment (Div _ bs) =
+ blocksToRTF indent alignment bs
+blockToRTF indent alignment (Plain lst) =
+ rtfCompact indent 0 alignment <$> inlinesToRTF lst
+blockToRTF indent alignment (Para lst) =
+ rtfPar indent 0 alignment <$> inlinesToRTF lst
+blockToRTF indent alignment (LineBlock lns) =
+ blockToRTF indent alignment $ linesToPara lns
+blockToRTF indent alignment (BlockQuote lst) =
+ blocksToRTF (indent + indentIncrement) alignment lst
+blockToRTF indent _ (CodeBlock _ str) =
+ return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
+blockToRTF _ _ b@(RawBlock f str)
+ | f == Format "rtf" = return str
+ | otherwise = do
+ report $ BlockNotRendered b
+ return ""
+blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$>
+ mapM (listItemToRTF alignment indent (bulletMarker indent)) lst
+blockToRTF indent alignment (OrderedList attribs lst) =
+ (spaceAtEnd . concat) <$>
+ mapM (\(x,y) -> listItemToRTF alignment indent x y)
+ (zip (orderedMarkers indent attribs) lst)
+blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$>
+ mapM (definitionListItemToRTF alignment indent) lst
+blockToRTF indent _ HorizontalRule = return $
+ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
+blockToRTF indent alignment (Header level _ lst) = do
+ contents <- inlinesToRTF lst
+ return $ rtfPar indent 0 alignment $
+ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ contents
+blockToRTF indent alignment (Table caption aligns sizes headers rows) = do
+ caption' <- inlinesToRTF caption
+ header' <- if all null headers
+ then return ""
+ else tableRowToRTF True indent aligns sizes headers
+ rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows
+ return $ header' ++ rows' ++ rtfPar indent 0 alignment caption'
+
+tableRowToRTF :: PandocMonad m
+ => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String
+tableRowToRTF header indent aligns sizes' cols = do
+ let totalTwips = 6 * 1440 -- 6 inches
+ let sizes = if all (== 0) sizes'
+ then take (length cols) $ repeat (1.0 / fromIntegral (length cols))
+ else sizes'
+ columns <- concat <$> mapM (\(x,y) -> tableItemToRTF indent x y)
+ (zip aligns cols)
+ let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
+ (0 :: Integer) sizes
+ let cellDefs = map (\edge -> (if header
+ then "\\clbrdrb\\brdrs"
+ else "") ++ "\\cellx" ++ show edge)
+ rightEdges
+ let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
+ "\\trkeep\\intbl\n{\n"
+ let end = "}\n\\intbl\\row}\n"
+ return $ start ++ columns ++ end
+
+tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String
+tableItemToRTF indent alignment item = do
+ contents <- blocksToRTF indent alignment item
+ return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n"
+
+-- | Ensure that there's the same amount of space after compact
+-- lists as after regular lists.
+spaceAtEnd :: String -> String
+spaceAtEnd str =
+ if isSuffixOf "\\par}\n" str
+ then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ else str
+
+-- | Convert list item (list of blocks) to RTF.
+listItemToRTF :: PandocMonad m
+ => Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> String -- ^ list start marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> m String
+listItemToRTF alignment indent marker [] = return $
+ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF alignment indent marker list = do
+ (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list
+ let listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++
+ "\\tx" ++ show listIncrement ++ "\\tab"
+ let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
+ listMarker ++ dropWhile isDigit xs
+ insertListMarker ('\\':'f':'i':d:xs) | isDigit d =
+ listMarker ++ dropWhile isDigit xs
+ insertListMarker (x:xs) =
+ x : insertListMarker xs
+ insertListMarker [] = []
+ -- insert the list marker into the (processed) first block
+ return $ insertListMarker first ++ concat rest
+
+-- | Convert definition list item (label, list of blocks) to RTF.
+definitionListItemToRTF :: PandocMonad m
+ => Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> ([Inline],[[Block]]) -- ^ list item (list of blocks)
+ -> m String
+definitionListItemToRTF alignment indent (label, defs) = do
+ labelText <- blockToRTF indent alignment (Plain label)
+ itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs)
+ return $ labelText ++ itemsText
+
+-- | Convert list of inline items to RTF.
+inlinesToRTF :: PandocMonad m
+ => [Inline] -- ^ list of inlines to convert
+ -> m String
+inlinesToRTF lst = concat <$> mapM inlineToRTF lst
+
+-- | Convert inline item to RTF.
+inlineToRTF :: PandocMonad m
+ => Inline -- ^ inline to convert
+ -> m String
+inlineToRTF (Span _ lst) = inlinesToRTF lst
+inlineToRTF (Emph lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\i " ++ contents ++ "}"
+inlineToRTF (Strong lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\b " ++ contents ++ "}"
+inlineToRTF (Strikeout lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\strike " ++ contents ++ "}"
+inlineToRTF (Superscript lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\super " ++ contents ++ "}"
+inlineToRTF (Subscript lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\sub " ++ contents ++ "}"
+inlineToRTF (SmallCaps lst) = do
+ contents <- inlinesToRTF lst
+ return $ "{\\scaps " ++ contents ++ "}"
+inlineToRTF (Quoted SingleQuote lst) = do
+ contents <- inlinesToRTF lst
+ return $ "\\u8216'" ++ contents ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) = do
+ contents <- inlinesToRTF lst
+ return $ "\\u8220\"" ++ contents ++ "\\u8221\""
+inlineToRTF (Code _ str) = return $ "{\\f1 " ++ (codeStringToRTF str) ++ "}"
+inlineToRTF (Str str) = return $ stringToRTF str
+inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF
+inlineToRTF (Cite _ lst) = inlinesToRTF lst
+inlineToRTF il@(RawInline f str)
+ | f == Format "rtf" = return str
+ | otherwise = do
+ return $ InlineNotRendered il
+ return ""
+inlineToRTF (LineBreak) = return "\\line "
+inlineToRTF SoftBreak = return " "
+inlineToRTF Space = return " "
+inlineToRTF (Link _ text (src, _)) = do
+ contents <- inlinesToRTF text
+ return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
+ "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n"
+inlineToRTF (Image _ _ (source, _)) =
+ return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Note contents) = do
+ body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents
+ return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ body ++ "}"
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
new file mode 100644
index 000000000..89a826269
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -0,0 +1,183 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Shared
+ Copyright : Copyright (C) 2013-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Shared utility functions for pandoc writers.
+-}
+module Text.Pandoc.Writers.Shared (
+ metaToJSON
+ , getField
+ , setField
+ , defField
+ , tagWithAttrs
+ , fixDisplayMath
+ , unsmartify
+ )
+where
+import Text.Pandoc.Definition
+import Text.Pandoc.Pretty
+import Text.Pandoc.Options
+import Text.Pandoc.XML (escapeStringForXML)
+import Control.Monad (liftM)
+import qualified Data.HashMap.Strict as H
+import qualified Data.Map as M
+import qualified Data.Text as T
+import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode)
+import Text.Pandoc.UTF8 (toStringLazy)
+import qualified Data.Traversable as Traversable
+import Data.List ( groupBy )
+import Data.Maybe ( isJust )
+
+-- | Create JSON value for template from a 'Meta' and an association list
+-- of variables, specified at the command line or in the writer.
+-- Variables overwrite metadata fields with the same names.
+-- If multiple variables are set with the same name, a list is
+-- assigned.
+metaToJSON :: Monad m
+ => WriterOptions
+ -> ([Block] -> m String)
+ -> ([Inline] -> m String)
+ -> Meta
+ -> m Value
+metaToJSON opts blockWriter inlineWriter (Meta metamap)
+ | isJust (writerTemplate opts) = do
+ let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty)
+ $ writerVariables opts
+ renderedMap <- Traversable.mapM
+ (metaValueToJSON blockWriter inlineWriter)
+ metamap
+ let metadata = M.foldWithKey defField baseContext renderedMap
+ return $ defField "meta-json" (toStringLazy $ encode metadata) metadata
+ | otherwise = return (Object H.empty)
+
+metaValueToJSON :: Monad m
+ => ([Block] -> m String)
+ -> ([Inline] -> m String)
+ -> MetaValue
+ -> m Value
+metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $
+ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap
+metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $
+ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs
+metaValueToJSON _ _ (MetaBool b) = return $ toJSON b
+metaValueToJSON _ _ (MetaString s) = return $ toJSON s
+metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs
+metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs
+
+-- | Retrieve a field value from a JSON object.
+getField :: FromJSON a
+ => String
+ -> Value
+ -> Maybe a
+getField field (Object hashmap) = do
+ result <- H.lookup (T.pack field) hashmap
+ case fromJSON result of
+ Success x -> return x
+ _ -> fail "Could not convert from JSON"
+getField _ _ = fail "Not a JSON object"
+
+setField :: ToJSON a
+ => String
+ -> a
+ -> Value
+ -> Value
+-- | Set a field of a JSON object. If the field already has a value,
+-- convert it into a list with the new value appended to the old value(s).
+-- This is a utility function to be used in preparing template contexts.
+setField field val (Object hashmap) =
+ Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap
+ where combine newval oldval =
+ case fromJSON oldval of
+ Success xs -> toJSON $ xs ++ [newval]
+ _ -> toJSON [oldval, newval]
+setField _ _ x = x
+
+defField :: ToJSON a
+ => String
+ -> a
+ -> Value
+ -> Value
+-- | Set a field of a JSON object if it currently has no value.
+-- If it has a value, do nothing.
+-- This is a utility function to be used in preparing template contexts.
+defField field val (Object hashmap) =
+ Object $ H.insertWith f (T.pack field) (toJSON val) hashmap
+ where f _newval oldval = oldval
+defField _ _ x = x
+
+-- Produce an HTML tag with the given pandoc attributes.
+tagWithAttrs :: String -> Attr -> Doc
+tagWithAttrs tag (ident,classes,kvs) = hsep
+ ["<" <> text tag
+ ,if null ident
+ then empty
+ else "id=" <> doubleQuotes (text ident)
+ ,if null classes
+ then empty
+ else "class=" <> doubleQuotes (text (unwords classes))
+ ,hsep (map (\(k,v) -> text k <> "=" <>
+ doubleQuotes (text (escapeStringForXML v))) kvs)
+ ] <> ">"
+
+isDisplayMath :: Inline -> Bool
+isDisplayMath (Math DisplayMath _) = True
+isDisplayMath _ = False
+
+stripLeadingTrailingSpace :: [Inline] -> [Inline]
+stripLeadingTrailingSpace = go . reverse . go . reverse
+ where go (Space:xs) = xs
+ go (SoftBreak:xs) = xs
+ go xs = xs
+
+-- Put display math in its own block (for ODT/DOCX).
+fixDisplayMath :: Block -> Block
+fixDisplayMath (Plain lst)
+ | any isDisplayMath lst && not (all isDisplayMath lst) =
+ -- chop into several paragraphs so each displaymath is its own
+ Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
+ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+ not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath (Para lst)
+ | any isDisplayMath lst && not (all isDisplayMath lst) =
+ -- chop into several paragraphs so each displaymath is its own
+ Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
+ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+ not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath x = x
+
+unsmartify :: WriterOptions -> String -> String
+unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs
+unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs
+unsmartify opts ('\8211':xs)
+ | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs
+ | otherwise = "--" ++ unsmartify opts xs
+unsmartify opts ('\8212':xs)
+ | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs
+ | otherwise = "---" ++ unsmartify opts xs
+unsmartify opts (x:xs) = x : unsmartify opts xs
+unsmartify _ [] = []
+
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
new file mode 100644
index 000000000..a54d42c53
--- /dev/null
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -0,0 +1,324 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.TEI (writeTEI) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Data.List ( stripPrefix, isPrefixOf )
+import Data.Char ( toLower )
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class ( PandocMonad )
+
+-- | Convert list of authors to a docbook <author> section
+authorToTEI :: WriterOptions -> [Inline] -> B.Inlines
+authorToTEI opts name' =
+ let name = render Nothing $ inlinesToTEI opts name'
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ in B.rawInline "tei" $ render colwidth $
+ inTagsSimple "author" (text $ escapeStringForXML name)
+
+-- | Convert Pandoc document to string in Docbook format.
+writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeTEI opts (Pandoc meta blocks) = return $
+ let elements = hierarchicalize blocks
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ startLvl = case writerTopLevelDivision opts of
+ TopLevelPart -> -1
+ TopLevelChapter -> 0
+ TopLevelSection -> 1
+ TopLevelDefault -> 1
+ auths' = map (authorToTEI opts) $ docAuthors meta
+ meta' = B.setMeta "author" auths' meta
+ Just metadata = metaToJSON opts
+ (Just . render colwidth . (vcat .
+ (map (elementToTEI opts startLvl)) . hierarchicalize))
+ (Just . render colwidth . inlinesToTEI opts)
+ meta'
+ main = render' $ vcat (map (elementToTEI opts startLvl) elements)
+ context = defField "body" main
+ $ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML -> True
+ _ -> False)
+ $ metadata
+ in case writerTemplate opts of
+ Nothing -> main
+ Just tpl -> renderTemplate' tpl context
+
+-- | Convert an Element to TEI.
+elementToTEI :: WriterOptions -> Int -> Element -> Doc
+elementToTEI opts _ (Blk block) = blockToTEI opts block
+elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) =
+ -- TEI doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ -- level numbering correspond to LaTeX internals
+ divType = case lvl of
+ n | n == -1 -> "part"
+ | n == 0 -> "chapter"
+ | n >= 1 && n <= 5 -> "level" ++ show n
+ | otherwise -> "section"
+ in inTags True "div" [("type", divType) | not (null id')] $
+-- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $
+ inTagsSimple "head" (inlinesToTEI opts title) $$
+ vcat (map (elementToTEI opts (lvl + 1)) elements')
+
+-- | Convert a list of Pandoc blocks to TEI.
+blocksToTEI :: WriterOptions -> [Block] -> Doc
+blocksToTEI opts = vcat . map (blockToTEI opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a TEI
+-- list with labels and items.
+deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc
+deflistItemsToTEI opts items =
+ vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items
+
+-- | Convert a term and a list of blocks into a TEI varlistentry.
+deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc
+deflistItemToTEI opts term defs =
+ let def' = concatMap (map plainToPara) defs
+ in inTagsIndented "label" (inlinesToTEI opts term) $$
+ inTagsIndented "item" (blocksToTEI opts def')
+
+-- | Convert a list of lists of blocks to a list of TEI list items.
+listItemsToTEI :: WriterOptions -> [[Block]] -> Doc
+listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items
+
+-- | Convert a list of blocks into a TEI list item.
+listItemToTEI :: WriterOptions -> [Block] -> Doc
+listItemToTEI opts item =
+ inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item
+
+imageToTEI :: WriterOptions -> Attr -> String -> Doc
+imageToTEI _ attr src = selfClosingTag "graphic" $
+ ("url", src) : idAndRole attr ++ dims
+ where
+ dims = go Width "width" ++ go Height "depth"
+ go dir dstr = case (dimension dir attr) of
+ Just a -> [(dstr, show a)]
+ Nothing -> []
+
+-- | Convert a Pandoc block element to TEI.
+blockToTEI :: WriterOptions -> Block -> Doc
+blockToTEI _ Null = empty
+-- Add ids to paragraphs in divs with ids - this is needed for
+-- pandoc-citeproc to get link anchors in bibliographies:
+blockToTEI opts (Div (ident,_,_) [Para lst]) =
+ let attribs = [("id", ident) | not (null ident)] in
+ inTags False "p" attribs $ inlinesToTEI opts lst
+blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
+blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize
+-- For TEI simple, text must be within containing block element, so
+-- we use plainToPara to ensure that Plain text ends up contained by
+-- something.
+blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
+-- title beginning with fig: indicates that the image is a figure
+--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
+-- let alt = inlinesToTEI opts txt
+-- capt = if null txt
+-- then empty
+-- else inTagsSimple "title" alt
+-- in inTagsIndented "figure" $
+-- capt $$
+-- (inTagsIndented "mediaobject" $
+-- (inTagsIndented "imageobject"
+-- (imageToTEI opts attr src)) $$
+-- inTagsSimple "textobject" (inTagsSimple "phrase" alt))
+blockToTEI opts (Para lst) =
+ inTags False "p" [] $ inlinesToTEI opts lst
+blockToTEI opts (LineBlock lns) =
+ blockToTEI opts $ linesToPara lns
+blockToTEI opts (BlockQuote blocks) =
+ inTagsIndented "quote" $ blocksToTEI opts blocks
+blockToTEI _ (CodeBlock (_,classes,_) str) =
+ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</ab>")
+ where lang = if null langs
+ then ""
+ else escapeStringForXML (head langs)
+ isLang l = map toLower l `elem` map (map toLower) languages
+ langsFrom s = if isLang s
+ then [s]
+ else languagesByExtension . map toLower $ s
+ langs = concatMap langsFrom classes
+blockToTEI opts (BulletList lst) =
+ let attribs = [("type", "unordered")]
+ in inTags True "list" attribs $ listItemsToTEI opts lst
+blockToTEI _ (OrderedList _ []) = empty
+blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) =
+ let attribs = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("type", "ordered:arabic")]
+ Example -> [("type", "ordered:arabic")]
+ UpperAlpha -> [("type", "ordered:upperalpha")]
+ LowerAlpha -> [("type", "ordered:loweralpha")]
+ UpperRoman -> [("type", "ordered:upperroman")]
+ LowerRoman -> [("type", "ordered:lowerroman")]
+ items = if start == 1
+ then listItemsToTEI opts (first:rest)
+ else (inTags True "item" [("n",show start)]
+ (blocksToTEI opts $ map plainToPara first)) $$
+ listItemsToTEI opts rest
+ in inTags True "list" attribs items
+blockToTEI opts (DefinitionList lst) =
+ let attribs = [("type", "definition")]
+ in inTags True "list" attribs $ deflistItemsToTEI opts lst
+blockToTEI _ (RawBlock f str)
+ | f == "tei" = text str -- raw TEI block (should such a thing exist).
+-- | f == "html" = text str -- allow html for backwards compatibility
+ | otherwise = empty
+blockToTEI _ HorizontalRule =
+ selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")]
+
+-- | TEI Tables
+-- TEI Simple's tables are composed of cells and rows; other
+-- table info in the AST is here lossily discard.
+blockToTEI opts (Table _ _ _ headers rows) =
+ let
+ headers' = tableHeadersToTEI opts headers
+-- headers' = if all null headers
+-- then return empty
+-- else tableRowToTEI opts headers
+ in
+ inTags True "table" [] $
+ vcat $ [headers'] <> map (tableRowToTEI opts) rows
+
+tableRowToTEI :: WriterOptions
+ -> [[Block]]
+ -> Doc
+tableRowToTEI opts cols =
+ inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols
+
+tableHeadersToTEI :: WriterOptions
+ -> [[Block]]
+ -> Doc
+tableHeadersToTEI opts cols =
+ inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols
+
+tableItemToTEI :: WriterOptions
+ -> [Block]
+ -> Doc
+tableItemToTEI opts item =
+ inTags False "cell" [] $ vcat $ map (blockToTEI opts) item
+
+-- | Convert a list of inline elements to TEI.
+inlinesToTEI :: WriterOptions -> [Inline] -> Doc
+inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst
+
+-- | Convert an inline element to TEI.
+inlineToTEI :: WriterOptions -> Inline -> Doc
+inlineToTEI _ (Str str) = text $ escapeStringForXML str
+inlineToTEI opts (Emph lst) =
+ inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst
+inlineToTEI opts (Strong lst) =
+ inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst
+inlineToTEI opts (Strikeout lst) =
+ inTags False "hi" [("rendition", "simple:strikethrough")] $
+ inlinesToTEI opts lst
+inlineToTEI opts (Superscript lst) =
+ inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst
+inlineToTEI opts (Subscript lst) =
+ inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst
+inlineToTEI opts (SmallCaps lst) =
+ inTags False "hi" [("rendition", "simple:smallcaps")] $
+ inlinesToTEI opts lst
+inlineToTEI opts (Quoted _ lst) =
+ inTagsSimple "quote" $ inlinesToTEI opts lst
+inlineToTEI opts (Cite _ lst) =
+ inlinesToTEI opts lst
+inlineToTEI opts (Span _ ils) =
+ inlinesToTEI opts ils
+inlineToTEI _ (Code _ str) =
+ inTags False "seg" [("type","code")] $ text (escapeStringForXML str)
+-- Distinguish display from inline math by wrapping the former in a "figure."
+inlineToTEI _ (Math t str) =
+ case t of
+ InlineMath -> inTags False "formula" [("notation","TeX")] $
+ text (str)
+ DisplayMath -> inTags True "figure" [("type","math")] $
+ inTags False "formula" [("notation","TeX")] $ text (str)
+
+inlineToTEI _ (RawInline f x) | f == "tei" = text x
+ | otherwise = empty
+inlineToTEI _ LineBreak = selfClosingTag "lb" []
+inlineToTEI _ Space = space
+-- because we use \n for LineBreak, we can't do soft breaks:
+inlineToTEI _ SoftBreak = space
+inlineToTEI opts (Link attr txt (src, _))
+ | Just email <- stripPrefix "mailto:" src =
+ let emailLink = text $
+ escapeStringForXML $ email
+ in case txt of
+ [Str s] | escapeURI s == email -> emailLink
+ _ -> inlinesToTEI opts txt <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise =
+ (if isPrefixOf "#" src
+ then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr
+ else inTags False "ref" $ ("target", src) : idAndRole attr ) $
+ inlinesToTEI opts txt
+inlineToTEI opts (Image attr description (src, tit)) =
+ let titleDoc = if null tit
+ then empty
+ else inTags False "figDesc" [] (text $ escapeStringForXML tit)
+ imageDesc = if null description
+ then empty
+ else inTags False "head" [] (inlinesToTEI opts description)
+ in inTagsIndented "figure" $ imageDesc $$
+ imageToTEI opts attr src $$ titleDoc
+inlineToTEI opts (Note contents) =
+ inTagsIndented "note" $ blocksToTEI opts contents
+
+idAndRole :: Attr -> [(String, String)]
+idAndRole (id',cls,_) = ident ++ role
+ where
+ ident = if null id'
+ then []
+ else [("id", id')]
+ role = if null cls
+ then []
+ else [("role", unwords cls)]
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
new file mode 100644
index 000000000..fe6024351
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -0,0 +1,498 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2008-2015 John MacFarlane and Peter Wang
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Texinfo
+ Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into Texinfo.
+-}
+module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Printf ( printf )
+import Data.List ( transpose, maximumBy )
+import Data.Ord ( comparing )
+import Data.Char ( chr, ord )
+import Control.Monad.State
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import Network.URI ( isURI, unEscapeString )
+import System.FilePath
+import qualified Data.Set as Set
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+
+data WriterState =
+ WriterState { stStrikeout :: Bool -- document contains strikeout
+ , stSuperscript :: Bool -- document contains superscript
+ , stSubscript :: Bool -- document contains subscript
+ , stEscapeComma :: Bool -- in a context where we need @comma
+ , stIdentifiers :: Set.Set String -- header ids used already
+ , stOptions :: WriterOptions -- writer options
+ }
+
+{- TODO:
+ - internal cross references a la HTML
+ - generated .texi files don't work when run through texi2dvi
+ -}
+
+type TI m = StateT WriterState m
+
+-- | Convert Pandoc to Texinfo.
+writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeTexinfo options document =
+ evalStateT (pandocToTexinfo options $ wrapTop document) $
+ WriterState { stStrikeout = False, stSuperscript = False,
+ stEscapeComma = False, stSubscript = False,
+ stIdentifiers = Set.empty, stOptions = options}
+
+-- | Add a "Top" node around the document, needed by Texinfo.
+wrapTop :: Pandoc -> Pandoc
+wrapTop (Pandoc meta blocks) =
+ Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks)
+
+pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m String
+pandocToTexinfo options (Pandoc meta blocks) = do
+ let titlePage = not $ all null
+ $ docTitle meta : docDate meta : docAuthors meta
+ let colwidth = if writerWrapText options == WrapAuto
+ then Just $ writerColumns options
+ else Nothing
+ metadata <- metaToJSON options
+ (fmap (render colwidth) . blockListToTexinfo)
+ (fmap (render colwidth) . inlineListToTexinfo)
+ meta
+ main <- blockListToTexinfo blocks
+ st <- get
+ let body = render colwidth main
+ let context = defField "body" body
+ $ defField "toc" (writerTableOfContents options)
+ $ defField "titlepage" titlePage
+ $ defField "subscript" (stSubscript st)
+ $ defField "superscript" (stSuperscript st)
+ $ defField "strikeout" (stStrikeout st)
+ $ metadata
+ case writerTemplate options of
+ Nothing -> return body
+ Just tpl -> return $ renderTemplate' tpl context
+
+-- | Escape things as needed for Texinfo.
+stringToTexinfo :: String -> String
+stringToTexinfo = escapeStringUsing texinfoEscapes
+ where texinfoEscapes = [ ('{', "@{")
+ , ('}', "@}")
+ , ('@', "@@")
+ , ('\160', "@ ")
+ , ('\x2014', "---")
+ , ('\x2013', "--")
+ , ('\x2026', "@dots{}")
+ , ('\x2019', "'")
+ ]
+
+escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc
+escapeCommas parser = do
+ oldEscapeComma <- gets stEscapeComma
+ modify $ \st -> st{ stEscapeComma = True }
+ res <- parser
+ modify $ \st -> st{ stEscapeComma = oldEscapeComma }
+ return res
+
+-- | Puts contents into Texinfo command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '@' <> text cmd <> braces contents
+
+-- | Convert Pandoc block element to Texinfo.
+blockToTexinfo :: PandocMonad m
+ => Block -- ^ Block to convert
+ -> TI m Doc
+
+blockToTexinfo Null = return empty
+
+blockToTexinfo (Div _ bs) = blockListToTexinfo bs
+
+blockToTexinfo (Plain lst) =
+ inlineListToTexinfo lst
+
+-- title beginning with fig: indicates that the image is a figure
+blockToTexinfo (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return empty
+ else (\c -> text "@caption" <> braces c) `fmap`
+ inlineListToTexinfo txt
+ img <- inlineToTexinfo (Image attr txt (src,tit))
+ return $ text "@float" $$ img $$ capt $$ text "@end float"
+
+blockToTexinfo (Para lst) =
+ inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo
+
+blockToTexinfo (LineBlock lns) =
+ blockToTexinfo $ linesToPara lns
+
+blockToTexinfo (BlockQuote lst) = do
+ contents <- blockListToTexinfo lst
+ return $ text "@quotation" $$
+ contents $$
+ text "@end quotation"
+
+blockToTexinfo (CodeBlock _ str) = do
+ return $ blankline $$
+ text "@verbatim" $$
+ flush (text str) $$
+ text "@end verbatim" <> blankline
+
+blockToTexinfo b@(RawBlock f str)
+ | f == "texinfo" = return $ text str
+ | f == "latex" || f == "tex" =
+ return $ text "@tex" $$ text str $$ text "@end tex"
+ | otherwise = do
+ report $ BlockNotRendered b
+ return empty
+
+blockToTexinfo (BulletList lst) = do
+ items <- mapM listItemToTexinfo lst
+ return $ text "@itemize" $$
+ vcat items $$
+ text "@end itemize" <> blankline
+
+blockToTexinfo (OrderedList (start, numstyle, _) lst) = do
+ items <- mapM listItemToTexinfo lst
+ return $ text "@enumerate " <> exemplar $$
+ vcat items $$
+ text "@end enumerate" <> blankline
+ where
+ exemplar = case numstyle of
+ DefaultStyle -> decimal
+ Decimal -> decimal
+ Example -> decimal
+ UpperRoman -> decimal -- Roman numerals not supported
+ LowerRoman -> decimal
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ decimal = if start == 1
+ then empty
+ else text (show start)
+ upperAlpha = text [chr $ ord 'A' + start - 1]
+ lowerAlpha = text [chr $ ord 'a' + start - 1]
+
+blockToTexinfo (DefinitionList lst) = do
+ items <- mapM defListItemToTexinfo lst
+ return $ text "@table @asis" $$
+ vcat items $$
+ text "@end table" <> blankline
+
+blockToTexinfo HorizontalRule =
+ -- XXX can't get the equivalent from LaTeX.hs to work
+ return $ text "@iftex" $$
+ text "@bigskip@hrule@bigskip" $$
+ text "@end iftex" $$
+ text "@ifnottex" $$
+ text (take 72 $ repeat '-') $$
+ text "@end ifnottex"
+
+blockToTexinfo (Header 0 _ lst) = do
+ txt <- if null lst
+ then return $ text "Top"
+ else inlineListToTexinfo lst
+ return $ text "@node Top" $$
+ text "@top " <> txt <> blankline
+
+blockToTexinfo (Header level _ lst)
+ | level < 1 || level > 4 = blockToTexinfo (Para lst)
+ | otherwise = do
+ node <- inlineListForNode lst
+ txt <- inlineListToTexinfo lst
+ idsUsed <- gets stIdentifiers
+ let id' = uniqueIdent lst idsUsed
+ modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
+ sec <- seccmd level
+ return $ if (level > 0) && (level <= 4)
+ then blankline <> text "@node " <> node $$
+ text sec <> txt $$
+ text "@anchor" <> braces (text $ '#':id')
+ else txt
+ where
+ seccmd :: PandocMonad m => Int -> TI m String
+ seccmd 1 = return "@chapter "
+ seccmd 2 = return "@section "
+ seccmd 3 = return "@subsection "
+ seccmd 4 = return "@subsubsection "
+ seccmd _ = throwError $ PandocSomeError "illegal seccmd level"
+
+blockToTexinfo (Table caption aligns widths heads rows) = do
+ headers <- if all null heads
+ then return empty
+ else tableHeadToTexinfo aligns heads
+ captionText <- inlineListToTexinfo caption
+ rowsText <- mapM (tableRowToTexinfo aligns) rows
+ colDescriptors <-
+ if all (== 0) widths
+ then do -- use longest entry instead of column widths
+ cols <- mapM (mapM (liftM (render Nothing . hcat) . mapM blockToTexinfo)) $
+ transpose $ heads : rows
+ return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols
+ else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
+ let tableBody = text ("@multitable " ++ colDescriptors) $$
+ headers $$
+ vcat rowsText $$
+ text "@end multitable"
+ return $ if isEmpty captionText
+ then tableBody <> blankline
+ else text "@float" $$
+ tableBody $$
+ inCmd "caption" captionText $$
+ text "@end float"
+
+tableHeadToTexinfo :: PandocMonad m
+ => [Alignment]
+ -> [[Block]]
+ -> TI m Doc
+tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem "
+
+tableRowToTexinfo :: PandocMonad m
+ => [Alignment]
+ -> [[Block]]
+ -> TI m Doc
+tableRowToTexinfo = tableAnyRowToTexinfo "@item "
+
+tableAnyRowToTexinfo :: PandocMonad m
+ => String
+ -> [Alignment]
+ -> [[Block]]
+ -> TI m Doc
+tableAnyRowToTexinfo itemtype aligns cols =
+ zipWithM alignedBlock aligns cols >>=
+ return . (text itemtype $$) . foldl (\row item -> row $$
+ (if isEmpty row then empty else text " @tab ") <> item) empty
+
+alignedBlock :: PandocMonad m
+ => Alignment
+ -> [Block]
+ -> TI m Doc
+-- XXX @flushleft and @flushright text won't get word wrapped. Since word
+-- wrapping is more important than alignment, we ignore the alignment.
+alignedBlock _ = blockListToTexinfo
+{-
+alignedBlock AlignLeft col = do
+ b <- blockListToTexinfo col
+ return $ text "@flushleft" $$ b $$ text "@end flushleft"
+alignedBlock AlignRight col = do
+ b <- blockListToTexinfo col
+ return $ text "@flushright" $$ b $$ text "@end flushright"
+alignedBlock _ col = blockListToTexinfo col
+-}
+
+-- | Convert Pandoc block elements to Texinfo.
+blockListToTexinfo :: PandocMonad m
+ => [Block]
+ -> TI m Doc
+blockListToTexinfo [] = return empty
+blockListToTexinfo (x:xs) = do
+ x' <- blockToTexinfo x
+ case x of
+ Header level _ _ -> do
+ -- We need need to insert a menu for this node.
+ let (before, after) = break isHeaderBlock xs
+ before' <- blockListToTexinfo before
+ let menu = if level < 4
+ then collectNodes (level + 1) after
+ else []
+ lines' <- mapM makeMenuLine menu
+ let menu' = if null lines'
+ then empty
+ else text "@menu" $$
+ vcat lines' $$
+ text "@end menu"
+ after' <- blockListToTexinfo after
+ return $ x' $$ before' $$ menu' $$ after'
+ Para _ -> do
+ xs' <- blockListToTexinfo xs
+ case xs of
+ ((CodeBlock _ _):_) -> return $ x' $$ xs'
+ _ -> return $ x' $+$ xs'
+ _ -> do
+ xs' <- blockListToTexinfo xs
+ return $ x' $$ xs'
+
+collectNodes :: Int -> [Block] -> [Block]
+collectNodes _ [] = []
+collectNodes level (x:xs) =
+ case x of
+ (Header hl _ _) ->
+ if hl < level
+ then []
+ else if hl == level
+ then x : collectNodes level xs
+ else collectNodes level xs
+ _ ->
+ collectNodes level xs
+
+makeMenuLine :: PandocMonad m
+ => Block
+ -> TI m Doc
+makeMenuLine (Header _ _ lst) = do
+ txt <- inlineListForNode lst
+ return $ text "* " <> txt <> text "::"
+makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block"
+
+listItemToTexinfo :: PandocMonad m
+ => [Block]
+ -> TI m Doc
+listItemToTexinfo lst = do
+ contents <- blockListToTexinfo lst
+ let spacer = case reverse lst of
+ (Para{}:_) -> blankline
+ _ -> empty
+ return $ text "@item" $$ contents <> spacer
+
+defListItemToTexinfo :: PandocMonad m
+ => ([Inline], [[Block]])
+ -> TI m Doc
+defListItemToTexinfo (term, defs) = do
+ term' <- inlineListToTexinfo term
+ let defToTexinfo bs = do d <- blockListToTexinfo bs
+ case reverse bs of
+ (Para{}:_) -> return $ d <> blankline
+ _ -> return d
+ defs' <- mapM defToTexinfo defs
+ return $ text "@item " <> term' $+$ vcat defs'
+
+-- | Convert list of inline elements to Texinfo.
+inlineListToTexinfo :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> TI m Doc
+inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
+
+-- | Convert list of inline elements to Texinfo acceptable for a node name.
+inlineListForNode :: PandocMonad m
+ => [Inline] -- ^ Inlines to convert
+ -> TI m Doc
+inlineListForNode = return . text . stringToTexinfo .
+ filter (not . disallowedInNode) . stringify
+
+-- periods, commas, colons, and parentheses are disallowed in node names
+disallowedInNode :: Char -> Bool
+disallowedInNode c = c `elem` (".,:()" :: String)
+
+-- | Convert inline element to Texinfo
+inlineToTexinfo :: PandocMonad m
+ => Inline -- ^ Inline to convert
+ -> TI m Doc
+
+inlineToTexinfo (Span _ lst) =
+ inlineListToTexinfo lst
+
+inlineToTexinfo (Emph lst) =
+ inlineListToTexinfo lst >>= return . inCmd "emph"
+
+inlineToTexinfo (Strong lst) =
+ inlineListToTexinfo lst >>= return . inCmd "strong"
+
+inlineToTexinfo (Strikeout lst) = do
+ modify $ \st -> st{ stStrikeout = True }
+ contents <- inlineListToTexinfo lst
+ return $ text "@textstrikeout{" <> contents <> text "}"
+
+inlineToTexinfo (Superscript lst) = do
+ modify $ \st -> st{ stSuperscript = True }
+ contents <- inlineListToTexinfo lst
+ return $ text "@textsuperscript{" <> contents <> char '}'
+
+inlineToTexinfo (Subscript lst) = do
+ modify $ \st -> st{ stSubscript = True }
+ contents <- inlineListToTexinfo lst
+ return $ text "@textsubscript{" <> contents <> char '}'
+
+inlineToTexinfo (SmallCaps lst) =
+ inlineListToTexinfo lst >>= return . inCmd "sc"
+
+inlineToTexinfo (Code _ str) = do
+ return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
+
+inlineToTexinfo (Quoted SingleQuote lst) = do
+ contents <- inlineListToTexinfo lst
+ return $ char '`' <> contents <> char '\''
+
+inlineToTexinfo (Quoted DoubleQuote lst) = do
+ contents <- inlineListToTexinfo lst
+ return $ text "``" <> contents <> text "''"
+
+inlineToTexinfo (Cite _ lst) =
+ inlineListToTexinfo lst
+inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
+inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
+inlineToTexinfo il@(RawInline f str)
+ | f == "latex" || f == "tex" =
+ return $ text "@tex" $$ text str $$ text "@end tex"
+ | f == "texinfo" = return $ text str
+ | otherwise = do
+ report $ InlineNotRendered il
+ return empty
+inlineToTexinfo (LineBreak) = return $ text "@*" <> cr
+inlineToTexinfo SoftBreak = do
+ wrapText <- gets (writerWrapText . stOptions)
+ case wrapText of
+ WrapAuto -> return space
+ WrapNone -> return space
+ WrapPreserve -> return cr
+inlineToTexinfo Space = return space
+
+inlineToTexinfo (Link _ txt (src@('#':_), _)) = do
+ contents <- escapeCommas $ inlineListToTexinfo txt
+ return $ text "@ref" <>
+ braces (text (stringToTexinfo src) <> text "," <> contents)
+inlineToTexinfo (Link _ txt (src, _)) = do
+ case txt of
+ [Str x] | escapeURI x == src -> -- autolink
+ do return $ text $ "@url{" ++ x ++ "}"
+ _ -> do contents <- escapeCommas $ inlineListToTexinfo txt
+ let src1 = stringToTexinfo src
+ return $ text ("@uref{" ++ src1 ++ ",") <> contents <>
+ char '}'
+
+inlineToTexinfo (Image attr alternate (source, _)) = do
+ content <- escapeCommas $ inlineListToTexinfo alternate
+ opts <- gets stOptions
+ let showDim dim = case (dimension dim attr) of
+ (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in"
+ (Just (Percent _)) -> ""
+ (Just d) -> show d
+ Nothing -> ""
+ return $ text ("@image{" ++ base ++ ',':(showDim Width) ++ ',':(showDim Height) ++ ",")
+ <> content <> text "," <> text (ext ++ "}")
+ where
+ ext = drop 1 $ takeExtension source'
+ base = dropExtension source'
+ source' = if isURI source
+ then source
+ else unEscapeString source
+
+inlineToTexinfo (Note contents) = do
+ contents' <- blockListToTexinfo contents
+ return $ text "@footnote" <> braces contents'
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
new file mode 100644
index 000000000..45f1780cf
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -0,0 +1,486 @@
+{-
+Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Textile
+ Copyright : Copyright (C) 2010-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Textile markup.
+
+Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
+-}
+module Text.Pandoc.Writers.Textile ( writeTextile ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (render)
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.XML ( escapeStringForXML )
+import Data.List ( intercalate )
+import Control.Monad.State
+import Data.Char ( isSpace )
+import Text.Pandoc.Class ( PandocMonad )
+
+data WriterState = WriterState {
+ stNotes :: [String] -- Footnotes
+ , stListLevel :: [Char] -- String at beginning of list items, e.g. "**"
+ , stStartNum :: Maybe Int -- Start number if first list item
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+-- | Convert Pandoc to Textile.
+writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeTextile opts document = return $
+ evalState (pandocToTextile opts document)
+ WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing,
+ stUseTags = False }
+
+-- | Return Textile representation of document.
+pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
+pandocToTextile opts (Pandoc meta blocks) = do
+ metadata <- metaToJSON opts (blockListToTextile opts)
+ (inlineListToTextile opts) meta
+ body <- blockListToTextile opts blocks
+ notes <- liftM (unlines . reverse . stNotes) get
+ let main = body ++ if null notes then "" else ("\n\n" ++ notes)
+ let context = defField "body" main metadata
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> return $ renderTemplate' tpl context
+
+withUseTags :: State WriterState a -> State WriterState a
+withUseTags action = do
+ oldUseTags <- liftM stUseTags get
+ modify $ \s -> s { stUseTags = True }
+ result <- action
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return result
+
+-- | Escape one character as needed for Textile.
+escapeCharForTextile :: Char -> String
+escapeCharForTextile x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '*' -> "&#42;"
+ '_' -> "&#95;"
+ '@' -> "&#64;"
+ '+' -> "&#43;"
+ '-' -> "&#45;"
+ '|' -> "&#124;"
+ '\x2014' -> " -- "
+ '\x2013' -> " - "
+ '\x2019' -> "'"
+ '\x2026' -> "..."
+ c -> [c]
+
+-- | Escape string as needed for Textile.
+escapeStringForTextile :: String -> String
+escapeStringForTextile = concatMap escapeCharForTextile
+
+-- | Convert Pandoc block element to Textile.
+blockToTextile :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState String
+
+blockToTextile _ Null = return ""
+
+blockToTextile opts (Div attr bs) = do
+ let startTag = render Nothing $ tagWithAttrs "div" attr
+ let endTag = "</div>"
+ contents <- blockListToTextile opts bs
+ return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n"
+
+blockToTextile opts (Plain inlines) =
+ inlineListToTextile opts inlines
+
+-- title beginning with fig: indicates that the image is a figure
+blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- blockToTextile opts (Para txt)
+ im <- inlineToTextile opts (Image attr txt (src,tit))
+ return $ im ++ "\n" ++ capt
+
+blockToTextile opts (Para inlines) = do
+ useTags <- liftM stUseTags get
+ listLevel <- liftM stListLevel get
+ contents <- inlineListToTextile opts inlines
+ return $ if useTags
+ then "<p>" ++ contents ++ "</p>"
+ else contents ++ if null listLevel then "\n" else ""
+
+blockToTextile opts (LineBlock lns) =
+ blockToTextile opts $ linesToPara lns
+
+blockToTextile _ (RawBlock f str)
+ | f == Format "html" || f == Format "textile" = return str
+ | otherwise = return ""
+
+blockToTextile _ HorizontalRule = return "<hr />\n"
+
+blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do
+ contents <- inlineListToTextile opts inlines
+ let identAttr = if null ident then "" else ('#':ident)
+ let attribs = if null identAttr && null classes
+ then ""
+ else "(" ++ unwords classes ++ identAttr ++ ")"
+ let lang = maybe "" (\x -> "[" ++ x ++ "]") $ lookup "lang" keyvals
+ let styles = maybe "" (\x -> "{" ++ x ++ "}") $ lookup "style" keyvals
+ let prefix = 'h' : show level ++ attribs ++ styles ++ lang ++ ". "
+ return $ prefix ++ contents ++ "\n"
+
+blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) =
+ return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++
+ "\n</pre>\n"
+ where classes' = if null classes
+ then ""
+ else " class=\"" ++ unwords classes ++ "\""
+
+blockToTextile _ (CodeBlock (_,classes,_) str) =
+ return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n"
+ where classes' = if null classes
+ then ""
+ else "(" ++ unwords classes ++ ")"
+
+blockToTextile opts (BlockQuote bs@[Para _]) = do
+ contents <- blockListToTextile opts bs
+ return $ "bq. " ++ contents ++ "\n\n"
+
+blockToTextile opts (BlockQuote blocks) = do
+ contents <- blockListToTextile opts blocks
+ return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n"
+
+blockToTextile opts (Table [] aligns widths headers rows') |
+ all (==0) widths = do
+ hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers
+ let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|"
+ let header = if all null headers then "" else cellsToRow hs ++ "\n"
+ let blocksToCell (align, bs) = do
+ contents <- stripTrailingNewlines <$> blockListToTextile opts bs
+ let alignMarker = case align of
+ AlignLeft -> "<. "
+ AlignRight -> ">. "
+ AlignCenter -> "=. "
+ AlignDefault -> ""
+ return $ alignMarker ++ contents
+ let rowToCells = mapM blocksToCell . zip aligns
+ bs <- mapM rowToCells rows'
+ let body = unlines $ map cellsToRow bs
+ return $ header ++ body
+
+blockToTextile opts (Table capt aligns widths headers rows') = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return ""
+ else do
+ c <- inlineListToTextile opts capt
+ return $ "<caption>" ++ c ++ "</caption>\n"
+ let percent w = show (truncate (100*w) :: Integer) ++ "%"
+ let coltags = if all (== 0.0) widths
+ then ""
+ else unlines $ map
+ (\w -> "<col width=\"" ++ percent w ++ "\" />") widths
+ head' <- if all null headers
+ then return ""
+ else do
+ hs <- tableRowToTextile opts alignStrings 0 headers
+ return $ "<thead>\n" ++ hs ++ "\n</thead>\n"
+ body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows'
+ return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++
+ "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n"
+
+blockToTextile opts x@(BulletList items) = do
+ oldUseTags <- liftM stUseTags get
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- withUseTags $ mapM (listItemToTextile opts) items
+ return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
+ level <- get >>= return . length . stListLevel
+ contents <- mapM (listItemToTextile opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents ++ (if level > 1 then "" else "\n")
+
+blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do
+ oldUseTags <- liftM stUseTags get
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- withUseTags $ mapM (listItemToTextile opts) items
+ return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++
+ "\n</ol>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "#"
+ , stStartNum = if start > 1
+ then Just start
+ else Nothing }
+ level <- get >>= return . length . stListLevel
+ contents <- mapM (listItemToTextile opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s),
+ stStartNum = Nothing }
+ return $ vcat contents ++ (if level > 1 then "" else "\n")
+
+blockToTextile opts (DefinitionList items) = do
+ contents <- withUseTags $ mapM (definitionListItemToTextile opts) items
+ return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n"
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet or ordered list item (list of blocks) to Textile.
+listItemToTextile :: WriterOptions -> [Block] -> State WriterState String
+listItemToTextile opts items = do
+ contents <- blockListToTextile opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<li>" ++ contents ++ "</li>"
+ else do
+ marker <- gets stListLevel
+ mbstart <- gets stStartNum
+ case mbstart of
+ Just n -> do
+ modify $ \s -> s{ stStartNum = Nothing }
+ return $ marker ++ show n ++ " " ++ contents
+ Nothing -> return $ marker ++ " " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to Textile.
+definitionListItemToTextile :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> State WriterState String
+definitionListItemToTextile opts (label, items) = do
+ labelText <- inlineListToTextile opts label
+ contents <- mapM (blockListToTextile opts) items
+ return $ "<dt>" ++ labelText ++ "</dt>\n" ++
+ (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents)
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (_, sty, _) items -> all isSimpleListItem items &&
+ sty `elem` [DefaultStyle, Decimal]
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+-- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki,
+-- and Textile writers, and should be abstracted out.)
+
+tableRowToTextile :: WriterOptions
+ -> [String]
+ -> Int
+ -> [[Block]]
+ -> State WriterState String
+tableRowToTextile opts alignStrings rownum cols' = do
+ let celltype = if rownum == 0 then "th" else "td"
+ let rowclass = case rownum of
+ 0 -> "header"
+ x | x `rem` 2 == 1 -> "odd"
+ _ -> "even"
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToTextile opts celltype alignment item)
+ alignStrings cols'
+ return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableItemToTextile :: WriterOptions
+ -> String
+ -> String
+ -> [Block]
+ -> State WriterState String
+tableItemToTextile opts celltype align' item = do
+ let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++
+ x ++ "</" ++ celltype ++ ">"
+ contents <- blockListToTextile opts item
+ return $ mkcell contents
+
+-- | Convert list of Pandoc block elements to Textile.
+blockListToTextile :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState String
+blockListToTextile opts blocks =
+ mapM (blockToTextile opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to Textile.
+inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToTextile opts lst =
+ mapM (inlineToTextile opts) lst >>= return . concat
+
+-- | Convert Pandoc inline element to Textile.
+inlineToTextile :: WriterOptions -> Inline -> State WriterState String
+
+inlineToTextile opts (Span _ lst) =
+ inlineListToTextile opts lst
+
+inlineToTextile opts (Emph lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '_' `elem` contents
+ then "<em>" ++ contents ++ "</em>"
+ else "_" ++ contents ++ "_"
+
+inlineToTextile opts (Strong lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '*' `elem` contents
+ then "<strong>" ++ contents ++ "</strong>"
+ else "*" ++ contents ++ "*"
+
+inlineToTextile opts (Strikeout lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '-' `elem` contents
+ then "<del>" ++ contents ++ "</del>"
+ else "-" ++ contents ++ "-"
+
+inlineToTextile opts (Superscript lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '^' `elem` contents
+ then "<sup>" ++ contents ++ "</sup>"
+ else "[^" ++ contents ++ "^]"
+
+inlineToTextile opts (Subscript lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '~' `elem` contents
+ then "<sub>" ++ contents ++ "</sub>"
+ else "[~" ++ contents ++ "~]"
+
+inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst
+
+inlineToTextile opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ "'" ++ contents ++ "'"
+
+inlineToTextile opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ "\"" ++ contents ++ "\""
+
+inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst
+
+inlineToTextile _ (Code _ str) =
+ return $ if '@' `elem` str
+ then "<tt>" ++ escapeStringForXML str ++ "</tt>"
+ else "@" ++ str ++ "@"
+
+inlineToTextile _ (Str str) = return $ escapeStringForTextile str
+
+inlineToTextile _ (Math _ str) =
+ return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>"
+
+inlineToTextile opts (RawInline f str)
+ | f == Format "html" || f == Format "textile" = return str
+ | (f == Format "latex" || f == Format "tex") &&
+ isEnabled Ext_raw_tex opts = return str
+ | otherwise = return ""
+
+inlineToTextile _ LineBreak = return "\n"
+
+inlineToTextile _ SoftBreak = return " "
+
+inlineToTextile _ Space = return " "
+
+inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do
+ let classes = if null cls
+ then ""
+ else "(" ++ unwords cls ++ ")"
+ label <- case txt of
+ [Code _ s]
+ | s == src -> return "$"
+ [Str s]
+ | s == src -> return "$"
+ _ -> inlineListToTextile opts txt
+ return $ "\"" ++ classes ++ label ++ "\":" ++ src
+
+inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do
+ alt' <- inlineListToTextile opts alt
+ let txt = if null tit
+ then if null alt'
+ then ""
+ else "(" ++ alt' ++ ")"
+ else "(" ++ tit ++ ")"
+ classes = if null cls
+ then ""
+ else "(" ++ unwords cls ++ ")"
+ showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";"
+ in case (dimension dir attr) of
+ Just (Percent a) -> toCss $ show (Percent a)
+ Just dim -> toCss $ showInPixel opts dim ++ "px"
+ Nothing -> Nothing
+ styles = case (showDim Width, showDim Height) of
+ (Just w, Just h) -> "{" ++ w ++ h ++ "}"
+ (Just w, Nothing) -> "{" ++ w ++ "height:auto;}"
+ (Nothing, Just h) -> "{" ++ "width:auto;" ++ h ++ "}"
+ (Nothing, Nothing) -> ""
+ return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!"
+
+inlineToTextile opts (Note contents) = do
+ curNotes <- liftM stNotes get
+ let newnum = length curNotes + 1
+ contents' <- blockListToTextile opts contents
+ let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n"
+ modify $ \s -> s { stNotes = thisnote : curNotes }
+ return $ "[" ++ show newnum ++ "]"
+ -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
new file mode 100644
index 000000000..d01ce0e8b
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -0,0 +1,396 @@
+{-
+Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ZimWiki
+ Copyright : Copyright (C) 2008-2015 John MacFarlane, 2017 Alex Ivkin
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alex Ivkin <alex@ivkin.net>
+ Stability : beta
+ Portability : portable
+
+Conversion of 'Pandoc' documents to ZimWiki markup.
+
+http://zim-wiki.org/manual/Help/Wiki_Syntax.html
+-}
+
+module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) )
+import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr
+ , substitute )
+import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Templates ( renderTemplate' )
+import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf )
+import Data.Text ( breakOnAll, pack )
+import Data.Default (Default(..))
+import Network.URI ( isURI )
+import Control.Monad ( zipWithM )
+import Control.Monad.State ( modify, State, get, evalState )
+import Text.Pandoc.Class ( PandocMonad )
+import qualified Data.Map as Map
+
+data WriterState = WriterState {
+ stItemNum :: Int,
+ stIndent :: String, -- Indent after the marker at the beginning of list items
+ stInTable :: Bool, -- Inside a table
+ stInLink :: Bool -- Inside a link description
+ }
+
+instance Default WriterState where
+ def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False }
+
+-- | Convert Pandoc to ZimWiki.
+writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
+writeZimWiki opts document = return $ evalState (pandocToZimWiki opts document) def
+
+-- | Return ZimWiki representation of document.
+pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String
+pandocToZimWiki opts (Pandoc meta blocks) = do
+ metadata <- metaToJSON opts
+ (fmap trimr . blockListToZimWiki opts)
+ (inlineListToZimWiki opts)
+ meta
+ body <- blockListToZimWiki opts blocks
+ --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n"
+ let main = body
+ let context = defField "body" main
+ $ defField "toc" (writerTableOfContents opts)
+ $ metadata
+ case writerTemplate opts of
+ Just tpl -> return $ renderTemplate' tpl context
+ Nothing -> return main
+
+-- | Escape special characters for ZimWiki.
+escapeString :: String -> String
+escapeString = substitute "__" "''__''" .
+ substitute "**" "''**''" .
+ substitute "~~" "''~~''" .
+ substitute "//" "''//''"
+
+-- | Convert Pandoc block element to ZimWiki.
+blockToZimWiki :: WriterOptions -> Block -> State WriterState String
+
+blockToZimWiki _ Null = return ""
+
+blockToZimWiki opts (Div _attrs bs) = do
+ contents <- blockListToZimWiki opts bs
+ return $ contents ++ "\n"
+
+blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines
+
+-- title beginning with fig: indicates that the image is a figure
+-- ZimWiki doesn't support captions - so combine together alt and caption into alt
+blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return ""
+ else (" " ++) `fmap` inlineListToZimWiki opts txt
+ let opt = if null txt
+ then ""
+ else "|" ++ if null tit then capt else tit ++ capt
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI src then "" else ":"
+ return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n"
+
+blockToZimWiki opts (Para inlines) = do
+ indent <- stIndent <$> get
+ -- useTags <- stUseTags <$> get
+ contents <- inlineListToZimWiki opts inlines
+ return $ contents ++ if null indent then "\n" else ""
+
+blockToZimWiki opts (LineBlock lns) = do
+ blockToZimWiki opts $ linesToPara lns
+
+blockToZimWiki opts (RawBlock f str)
+ | f == Format "zimwiki" = return str
+ | f == Format "html" = do cont <- indentFromHTML opts str; return cont
+ | otherwise = return ""
+
+blockToZimWiki _ HorizontalRule = return "\n----\n"
+
+blockToZimWiki opts (Header level _ inlines) = do
+ contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers
+ let eqs = replicate ( 7 - level ) '='
+ return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
+
+blockToZimWiki _ (CodeBlock (_,classes,_) str) = do
+ -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using
+ let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")]
+ let langmap = Map.fromList langal
+ return $ case classes of
+ [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block
+ (x:_) -> "{{{code: lang=\"" ++
+ (case Map.lookup x langmap of
+ Nothing -> x
+ Just y -> y) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec
+
+blockToZimWiki opts (BlockQuote blocks) = do
+ contents <- blockListToZimWiki opts blocks
+ return $ unlines $ map ("> " ++) $ lines contents
+
+blockToZimWiki opts (Table capt aligns _ headers rows) = do
+ captionDoc <- if null capt
+ then return ""
+ else do
+ c <- inlineListToZimWiki opts capt
+ return $ "" ++ c ++ "\n"
+ headers' <- if all null headers
+ then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0)
+ else mapM (inlineListToZimWiki opts) (map removeFormatting headers) -- emphasis, links etc. are not allowed in table headers
+ rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
+ let widths = map (maximum . map length) $ transpose (headers':rows')
+ let padTo (width, al) s =
+ case (width - length s) of
+ x | x > 0 ->
+ if al == AlignLeft || al == AlignDefault
+ then s ++ replicate x ' '
+ else if al == AlignRight
+ then replicate x ' ' ++ s
+ else replicate (x `div` 2) ' ' ++
+ s ++ replicate (x - x `div` 2) ' '
+ | otherwise -> s
+ let borderCell (width, al) _ =
+ if al == AlignLeft
+ then ":"++ replicate (width-1) '-'
+ else if al == AlignDefault
+ then replicate width '-'
+ else if al == AlignRight
+ then replicate (width-1) '-' ++ ":"
+ else ":" ++ replicate (width-2) '-' ++ ":"
+ let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|"
+ let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|"
+ return $ captionDoc ++
+ (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++
+ unlines (map renderRow rows')
+
+blockToZimWiki opts (BulletList items) = do
+ indent <- stIndent <$> get
+ modify $ \s -> s { stIndent = stIndent s ++ "\t" }
+ contents <- (mapM (listItemToZimWiki opts) items)
+ modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+blockToZimWiki opts (OrderedList _ items) = do
+ indent <- stIndent <$> get
+ modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 }
+ contents <- (mapM (orderedListItemToZimWiki opts) items)
+ modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) }
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+blockToZimWiki opts (DefinitionList items) = do
+ contents <- (mapM (definitionListItemToZimWiki opts) items)
+ return $ vcat contents
+
+definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String
+definitionListItemToZimWiki opts (label, items) = do
+ labelText <- inlineListToZimWiki opts label
+ contents <- mapM (blockListToZimWiki opts) items
+ indent <- stIndent <$> get
+ return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents
+
+-- Auxiliary functions for lists:
+indentFromHTML :: WriterOptions -> String -> State WriterState String
+indentFromHTML _ str = do
+ indent <- stIndent <$> get
+ itemnum <- stItemNum <$> get
+ if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "."
+ else if isInfixOf "</li>" str then return "\n"
+ else if isInfixOf "<li value=" str then do
+ -- poor man's cut
+ let val = drop 10 $ reverse $ drop 1 $ reverse str
+ --let val = take ((length valls) - 2) valls
+ modify $ \s -> s { stItemNum = read val }
+ return ""
+ else if isInfixOf "<ol>" str then do
+ let olcount=countSubStrs "<ol>" str
+ modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 }
+ return ""
+ else if isInfixOf "</ol>" str then do
+ let olcount=countSubStrs "/<ol>" str
+ modify $ \s -> s{ stIndent = drop olcount (stIndent s) }
+ return ""
+ else
+ return ""
+
+countSubStrs :: String -> String -> Int
+countSubStrs sub str = length $ breakOnAll (pack sub) (pack str)
+
+cleanupCode :: String -> String
+cleanupCode = substitute "<nowiki>" "" . substitute "</nowiki>" ""
+
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+-- | Convert bullet list item (list of blocks) to ZimWiki.
+listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+listItemToZimWiki opts items = do
+ contents <- blockListToZimWiki opts items
+ indent <- stIndent <$> get
+ return $ indent ++ "* " ++ contents
+
+-- | Convert ordered list item (list of blocks) to ZimWiki.
+orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+orderedListItemToZimWiki opts items = do
+ contents <- blockListToZimWiki opts items
+ indent <- stIndent <$> get
+ itemnum <- stItemNum <$> get
+ --modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering
+ return $ indent ++ show itemnum ++ ". " ++ contents
+
+-- Auxiliary functions for tables:
+tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String
+tableItemToZimWiki opts align' item = do
+ let mkcell x = (if align' == AlignRight || align' == AlignCenter
+ then " "
+ else "") ++ x ++
+ (if align' == AlignLeft || align' == AlignCenter
+ then " "
+ else "")
+ modify $ \s -> s { stInTable = True }
+ contents <- blockListToZimWiki opts item
+ modify $ \s -> s { stInTable = False }
+ return $ mkcell contents
+
+-- | Convert list of Pandoc block elements to ZimWiki.
+blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String
+blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks
+
+-- | Convert list of Pandoc inline elements to ZimWiki.
+inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst)
+
+-- | Convert Pandoc inline element to ZimWiki.
+inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String
+
+inlineToZimWiki opts (Emph lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "//" ++ contents ++ "//"
+
+inlineToZimWiki opts (Strong lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "**" ++ contents ++ "**"
+
+inlineToZimWiki opts (Strikeout lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "~~" ++ contents ++ "~~"
+
+inlineToZimWiki opts (Superscript lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "^{" ++ contents ++ "}"
+
+inlineToZimWiki opts (Subscript lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "_{" ++ contents ++ "}"
+
+inlineToZimWiki opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "\8216" ++ contents ++ "\8217"
+
+inlineToZimWiki opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToZimWiki opts lst
+ return $ "\8220" ++ contents ++ "\8221"
+
+inlineToZimWiki opts (Span _attrs ils) = inlineListToZimWiki opts ils
+
+inlineToZimWiki opts (SmallCaps lst) = inlineListToZimWiki opts lst
+
+inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst
+
+inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''"
+
+inlineToZimWiki _ (Str str) = do
+ inTable <- stInTable <$> get
+ inLink <- stInLink <$> get
+ if inTable
+ then return $ substitute "|" "\\|" . escapeString $ str
+ else
+ if inLink
+ then return $ str
+ else return $ escapeString str
+
+inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped
+ where delim = case mathType of
+ DisplayMath -> "$$"
+ InlineMath -> "$"
+
+-- | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
+inlineToZimWiki opts (RawInline f str)
+ | f == Format "zimwiki" = return str
+ | f == Format "html" = do cont <- indentFromHTML opts str; return cont
+ | otherwise = return ""
+
+inlineToZimWiki _ LineBreak = do
+ inTable <- stInTable <$> get
+ if inTable
+ then return "\\n"
+ else return "\n"
+
+inlineToZimWiki opts SoftBreak =
+ case writerWrapText opts of
+ WrapNone -> return " "
+ WrapAuto -> return " "
+ WrapPreserve -> return "\n"
+
+inlineToZimWiki _ Space = return " "
+
+inlineToZimWiki opts (Link _ txt (src, _)) = do
+ inTable <- stInTable <$> get
+ modify $ \s -> s { stInLink = True }
+ label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it
+ modify $ \s -> s { stInLink = False }
+ let label'= if inTable
+ then "" -- no label is allowed in a table
+ else "|"++label
+ case txt of
+ [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
+ | escapeURI s == src -> return src
+ _ -> if isURI src
+ then return $ "[[" ++ src ++ label' ++ "]]"
+ else return $ "[[" ++ src' ++ label' ++ "]]"
+ where src' = case src of
+ '/':xs -> xs -- with leading / it's a
+ _ -> src -- link to a help page
+inlineToZimWiki opts (Image attr alt (source, tit)) = do
+ alt' <- inlineListToZimWiki opts alt
+ inTable <- stInTable <$> get
+ let txt = case (tit, alt, inTable) of
+ ("",[], _) -> ""
+ ("", _, False ) -> "|" ++ alt'
+ (_ , _, False ) -> "|" ++ tit
+ (_ , _, True ) -> ""
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI source then "" else ":"
+ return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}"
+
+inlineToZimWiki opts (Note contents) = do
+ -- no concept of notes in zim wiki, use a text block
+ contents' <- blockListToZimWiki opts contents
+ return $ " **{Note:** " ++ trimr contents' ++ "**}**"
+
+imageDims :: WriterOptions -> Attr -> String
+imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr)
+ where
+ toPx = fmap (showInPixel opts) . checkPct
+ checkPct (Just (Percent _)) = Nothing
+ checkPct maybeDim = maybeDim
+ go (Just w) Nothing = "?" ++ w
+ go (Just w) (Just h) = "?" ++ w ++ "x" ++ h
+ go Nothing (Just h) = "?0x" ++ h
+ go Nothing Nothing = ""
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
new file mode 100644
index 000000000..e105aee91
--- /dev/null
+++ b/src/Text/Pandoc/XML.hs
@@ -0,0 +1,115 @@
+{-
+Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.XML
+ Copyright : Copyright (C) 2006-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for escaping and formatting XML.
+-}
+module Text.Pandoc.XML ( escapeCharForXML,
+ escapeStringForXML,
+ inTags,
+ selfClosingTag,
+ inTagsSimple,
+ inTagsIndented,
+ toEntities,
+ fromEntities ) where
+
+import Text.Pandoc.Pretty
+import Data.Char (ord, isAscii, isSpace)
+import Text.HTML.TagSoup.Entity (lookupEntity)
+
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ c -> [c]
+
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML = concatMap escapeCharForXML
+
+-- | Escape newline characters as &#10;
+escapeNls :: String -> String
+escapeNls (x:xs)
+ | x == '\n' = "&#10;" ++ escapeNls xs
+ | otherwise = x : escapeNls xs
+escapeNls [] = []
+
+-- | Return a text object with a string of formatted XML attributes.
+attributeList :: [(String, String)] -> Doc
+attributeList = hcat . map
+ (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
+ escapeNls (escapeStringForXML b) ++ "\""))
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented tagType attribs contents =
+ let openTag = char '<' <> text tagType <> attributeList attribs <>
+ char '>'
+ closeTag = text "</" <> text tagType <> char '>'
+ in if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: String -> [(String, String)] -> Doc
+selfClosingTag tagType attribs =
+ char '<' <> text tagType <> attributeList attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: String -> Doc -> Doc
+inTagsSimple tagType = inTags False tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: String -> Doc -> Doc
+inTagsIndented tagType = inTags True tagType []
+
+-- | Escape all non-ascii characters using numerical entities.
+toEntities :: String -> String
+toEntities [] = ""
+toEntities (c:cs)
+ | isAscii c = c : toEntities cs
+ | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+
+-- Unescapes XML entities
+fromEntities :: String -> String
+fromEntities ('&':xs) =
+ case lookupEntity ent' of
+ Just c -> c ++ fromEntities rest
+ Nothing -> '&' : fromEntities xs
+ where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
+ (zs,';':ys) -> (zs,ys)
+ (zs, ys) -> (zs,ys)
+ ent' = case ent of
+ '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug
+ '#':_ -> ent
+ _ -> ent ++ ";"
+
+fromEntities (x:xs) = x : fromEntities xs
+fromEntities [] = []