diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-02-20 20:52:00 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-02-20 20:52:00 +0100 |
commit | ce8226f1a7d64da56117d2f7f351e06225a84614 (patch) | |
tree | 9f2d716df0230f5f17372f19b8718dcf86039fd9 /src/Text | |
parent | e86e44b98e592d5a5e4c6b43d9b57b195f091ed9 (diff) | |
parent | 12d96508c62189b4ff8c8b797d34cc9ef177f5ee (diff) | |
download | pandoc-ce8226f1a7d64da56117d2f7f351e06225a84614.tar.gz |
Merge commit '9e52ac6bb02afd7b4ed5dad61021a1fa33051203' as 'data/templates'
Diffstat (limited to 'src/Text')
97 files changed, 0 insertions, 45056 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs deleted file mode 100644 index 47b891eb3..000000000 --- a/src/Text/Pandoc.hs +++ /dev/null @@ -1,380 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, GADTs #-} -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -This helper module exports the main writers, readers, and data -structure definitions from the Pandoc libraries. - -A typical application will chain together a reader and a writer -to convert strings from one format to another. For example, the -following simple program will act as a filter converting markdown -fragments to reStructuredText, using reference-style links instead of -inline links: - -> module Main where -> import Text.Pandoc -> -> markdownToRST :: String -> Either PandocError String -> markdownToRST = -> writeRST def {writerReferenceLinks = True} . readMarkdown def -> -> main = getContents >>= either error return markdownToRST >>= putStrLn - -Note: all of the readers assume that the input text has @'\n'@ -line endings. So if you get your input text from a web form, -you should remove @'\r'@ characters using @filter (/='\r')@. - --} - -module Text.Pandoc - ( - -- * Definitions - module Text.Pandoc.Definition - -- * Generics - , module Text.Pandoc.Generic - -- * Options - , module Text.Pandoc.Options - -- * Logging - , module Text.Pandoc.Logging - -- * Typeclass - , PandocMonad - , runIO - , runPure - , runIOorExplode - , setVerbosity - -- * Error handling - , module Text.Pandoc.Error - -- * Lists of readers and writers - , readers - -- , writers - , writers - -- * Readers: converting /to/ Pandoc format - , Reader (..) - , readDocx - , readOdt - , readMarkdown - , readCommonMark - , readMediaWiki - , readRST - , readOrg - , readLaTeX - , readHtml - , readTextile - , readDocBook - , readOPML - , readHaddock - , readNative - , readJSON - , readTWiki - , readTxt2Tags - , readEPUB - -- * Writers: converting /from/ Pandoc format - , Writer(..) - , writeNative - , writeJSON - , writeMarkdown - , writePlain - , writeRST - , writeLaTeX - , writeBeamer - , writeConTeXt - , writeTexinfo - , writeHtml4 - , writeHtml4String - , writeHtml5 - , writeHtml5String - , writeRevealJs - , writeS5 - , writeSlidy - , writeSlideous - , writeDZSlides - , writeICML - , writeDocbook4 - , writeDocbook5 - , writeOPML - , writeOpenDocument - , writeMan - , writeMediaWiki - , writeDokuWiki - , writeZimWiki - , writeTextile - , writeRTF - , writeODT - , writeDocx - , writeEPUB2 - , writeEPUB3 - , writeFB2 - , writeOrg - , writeAsciiDoc - , writeHaddock - , writeCommonMark - , writeCustom - , writeTEI - -- * Rendering templates and default templates - , module Text.Pandoc.Templates - -- * Miscellaneous - , getReader - , getWriter - , getDefaultExtensions - , pandocVersion - ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Readers.Markdown -import Text.Pandoc.Readers.CommonMark -import Text.Pandoc.Readers.MediaWiki -import Text.Pandoc.Readers.RST -import Text.Pandoc.Readers.Org -import Text.Pandoc.Readers.DocBook -import Text.Pandoc.Readers.OPML -import Text.Pandoc.Readers.LaTeX -import Text.Pandoc.Readers.HTML -import Text.Pandoc.Readers.Textile -import Text.Pandoc.Readers.Native -import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.TWiki -import Text.Pandoc.Readers.Docx -import Text.Pandoc.Readers.Odt -import Text.Pandoc.Readers.Txt2Tags -import Text.Pandoc.Readers.EPUB -import Text.Pandoc.Writers.Native -import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST -import Text.Pandoc.Writers.LaTeX -import Text.Pandoc.Writers.ConTeXt -import Text.Pandoc.Writers.Texinfo -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.ODT -import Text.Pandoc.Writers.Docx -import Text.Pandoc.Writers.EPUB -import Text.Pandoc.Writers.FB2 -import Text.Pandoc.Writers.ICML -import Text.Pandoc.Writers.Docbook -import Text.Pandoc.Writers.OPML -import Text.Pandoc.Writers.OpenDocument -import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF -import Text.Pandoc.Writers.MediaWiki -import Text.Pandoc.Writers.DokuWiki -import Text.Pandoc.Writers.ZimWiki -import Text.Pandoc.Writers.Textile -import Text.Pandoc.Writers.Org -import Text.Pandoc.Writers.AsciiDoc -import Text.Pandoc.Writers.Haddock -import Text.Pandoc.Writers.CommonMark -import Text.Pandoc.Writers.Custom -import Text.Pandoc.Writers.TEI -import Text.Pandoc.Templates -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Shared (safeRead, mapLeft, pandocVersion) -import Text.Pandoc.Error -import Text.Pandoc.Class -import Data.Aeson -import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) -import Text.Parsec -import Text.Parsec.Error -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad.Except (throwError) - -parseFormatSpec :: String - -> Either ParseError (String, Extensions -> Extensions) -parseFormatSpec = parse formatSpec "" - where formatSpec = do - name <- formatName - extMods <- many extMod - return (name, \x -> foldl (flip ($)) x extMods) - formatName = many1 $ noneOf "-+" - extMod = do - polarity <- oneOf "-+" - name <- many $ noneOf "-+" - ext <- case safeRead ("Ext_" ++ name) of - Just n -> return n - Nothing - | name == "lhs" -> return Ext_literate_haskell - | otherwise -> fail $ "Unknown extension: " ++ name - return $ case polarity of - '-' -> disableExtension ext - _ -> enableExtension ext - -data Reader m = StringReader (ReaderOptions -> String -> m Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) - --- | Association list of formats and readers. -readers :: PandocMonad m => [(String, Reader m)] -readers = [ ("native" , StringReader readNative) - ,("json" , StringReader $ \o s -> - case readJSON o s of - Right doc -> return doc - Left _ -> throwError $ PandocParseError "JSON parse error") - ,("markdown" , StringReader readMarkdown) - ,("markdown_strict" , StringReader readMarkdown) - ,("markdown_phpextra" , StringReader readMarkdown) - ,("markdown_github" , StringReader readMarkdown) - ,("markdown_mmd", StringReader readMarkdown) - ,("commonmark" , StringReader readCommonMark) - ,("rst" , StringReader readRST) - ,("mediawiki" , StringReader readMediaWiki) - ,("docbook" , StringReader readDocBook) - ,("opml" , StringReader readOPML) - ,("org" , StringReader readOrg) - ,("textile" , StringReader readTextile) -- TODO : textile+lhs - ,("html" , StringReader readHtml) - ,("latex" , StringReader readLaTeX) - ,("haddock" , StringReader readHaddock) - ,("twiki" , StringReader readTWiki) - ,("docx" , ByteStringReader readDocx) - ,("odt" , ByteStringReader readOdt) - ,("t2t" , StringReader readTxt2Tags) - ,("epub" , ByteStringReader readEPUB) - ] - -data Writer m = StringWriter (WriterOptions -> Pandoc -> m String) - | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) - --- | Association list of formats and writers. -writers :: PandocMonad m => [ ( String, Writer m) ] -writers = [ - ("native" , StringWriter writeNative) - ,("json" , StringWriter $ \o d -> return $ writeJSON o d) - ,("docx" , ByteStringWriter writeDocx) - ,("odt" , ByteStringWriter writeODT) - ,("epub" , ByteStringWriter writeEPUB3) - ,("epub2" , ByteStringWriter writeEPUB2) - ,("epub3" , ByteStringWriter writeEPUB3) - ,("fb2" , StringWriter writeFB2) - ,("html" , StringWriter writeHtml5String) - ,("html4" , StringWriter writeHtml4String) - ,("html5" , StringWriter writeHtml5String) - ,("icml" , StringWriter writeICML) - ,("s5" , StringWriter writeS5) - ,("slidy" , StringWriter writeSlidy) - ,("slideous" , StringWriter writeSlideous) - ,("dzslides" , StringWriter writeDZSlides) - ,("revealjs" , StringWriter writeRevealJs) - ,("docbook" , StringWriter writeDocbook5) - ,("docbook4" , StringWriter writeDocbook4) - ,("docbook5" , StringWriter writeDocbook5) - ,("opml" , StringWriter writeOPML) - ,("opendocument" , StringWriter writeOpenDocument) - ,("latex" , StringWriter writeLaTeX) - ,("beamer" , StringWriter writeBeamer) - ,("context" , StringWriter writeConTeXt) - ,("texinfo" , StringWriter writeTexinfo) - ,("man" , StringWriter writeMan) - ,("markdown" , StringWriter writeMarkdown) - ,("markdown_strict" , StringWriter writeMarkdown) - ,("markdown_phpextra" , StringWriter writeMarkdown) - ,("markdown_github" , StringWriter writeMarkdown) - ,("markdown_mmd" , StringWriter writeMarkdown) - ,("plain" , StringWriter writePlain) - ,("rst" , StringWriter writeRST) - ,("mediawiki" , StringWriter writeMediaWiki) - ,("dokuwiki" , StringWriter writeDokuWiki) - ,("zimwiki" , StringWriter writeZimWiki) - ,("textile" , StringWriter writeTextile) - ,("rtf" , StringWriter writeRTF) - ,("org" , StringWriter writeOrg) - ,("asciidoc" , StringWriter writeAsciiDoc) - ,("haddock" , StringWriter writeHaddock) - ,("commonmark" , StringWriter writeCommonMark) - ,("tei" , StringWriter writeTEI) - ] - -getDefaultExtensions :: String -> Extensions -getDefaultExtensions "markdown_strict" = strictExtensions -getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions -getDefaultExtensions "markdown_mmd" = multimarkdownExtensions -getDefaultExtensions "markdown_github" = githubMarkdownExtensions -getDefaultExtensions "markdown" = pandocExtensions -getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = extensionsFromList - [Ext_citations, - Ext_auto_identifiers] -getDefaultExtensions "html" = extensionsFromList - [Ext_auto_identifiers, - Ext_native_divs, - Ext_native_spans] -getDefaultExtensions "html4" = getDefaultExtensions "html" -getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = extensionsFromList - [Ext_raw_html, - Ext_native_divs, - Ext_native_spans, - Ext_epub_html_exts] -getDefaultExtensions "epub2" = getDefaultExtensions "epub" -getDefaultExtensions "epub3" = getDefaultExtensions "epub" -getDefaultExtensions "latex" = extensionsFromList - [Ext_smart, - Ext_auto_identifiers] -getDefaultExtensions "context" = extensionsFromList - [Ext_smart, - Ext_auto_identifiers] -getDefaultExtensions "textile" = extensionsFromList - [Ext_old_dashes, - Ext_smart, - Ext_raw_html, - Ext_auto_identifiers] -getDefaultExtensions _ = extensionsFromList - [Ext_auto_identifiers] - --- | Retrieve reader based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> Either String (Reader m) -getReader s = - case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (readerName, setExts) -> - case lookup readerName readers of - Nothing -> Left $ "Unknown reader: " ++ readerName - Just (StringReader r) -> Right $ StringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> - r o{ readerExtensions = setExts $ - getDefaultExtensions readerName } - -getWriter :: PandocMonad m => String -> Either String (Writer m) -getWriter s - = case parseFormatSpec s of - Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] - Right (writerName, setExts) -> - case lookup writerName writers of - Nothing -> Left $ "Unknown writer: " ++ writerName - Just (StringWriter r) -> Right $ StringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (ByteStringWriter r) -> Right $ ByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - -readJSON :: ReaderOptions -> String -> Either PandocError Pandoc -readJSON _ = mapLeft PandocParseError . eitherDecode' . UTF8.fromStringLazy - -writeJSON :: WriterOptions -> Pandoc -> String -writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs deleted file mode 100644 index be8f26811..000000000 --- a/src/Text/Pandoc/App.hs +++ /dev/null @@ -1,1444 +0,0 @@ -{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-} -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.App - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley@edu> - Stability : alpha - Portability : portable - -Does a pandoc conversion based on command-line options. --} -module Text.Pandoc.App ( - convertWithOpts - , Opt(..) - , defaultOpts - , parseOptions - , options - ) where -import Text.Pandoc -import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Walk (walk) -import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, - headerShift, err, openURL, safeRead, - readDataFile ) -import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) -import Text.Pandoc.XML ( toEntities ) -import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.SelfContained ( makeSelfContained ) -import Text.Pandoc.Process (pipeProcess) -import Skylighting ( Style, defaultSyntaxMap, Syntax(..) ) -import Text.Printf -import System.Environment ( getEnvironment, getProgName, getArgs ) -import Control.Applicative ((<|>)) -import System.Exit ( ExitCode (..), exitSuccess ) -import System.FilePath -import Data.Char ( toLower, toUpper ) -import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) -import System.Directory ( getAppUserDataDirectory, findExecutable, - doesFileExist, Permissions(..), getPermissions ) -import System.IO ( stdout, stderr ) -import System.IO.Error ( isDoesNotExistError ) -import qualified Control.Exception as E -import Control.Exception.Extensible ( throwIO ) -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad -import Control.Monad.Trans -import Data.Maybe (fromMaybe, isNothing, isJust) -import Data.Foldable (foldrM) -import Network.URI (parseURI, isURI, URI(..)) -import qualified Data.ByteString.Lazy as B -import qualified Data.ByteString as BS -import qualified Data.Map as M -import Data.Aeson (eitherDecode', encode) -import Data.Yaml (decode) -import qualified Data.Yaml as Yaml -import qualified Data.Text as T -import System.Console.GetOpt -import Text.Pandoc.Class (withMediaBag, PandocIO, getLog) -import Paths_pandoc (getDataDir) -#ifndef _WINDOWS -import System.Posix.Terminal (queryTerminal) -import System.Posix.IO (stdOutput) -#endif - -parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt -parseOptions options' defaults = do - rawArgs <- map UTF8.decodeArg <$> getArgs - prg <- getProgName - - let (actions, args, unrecognizedOpts, errors) = - getOpt' Permute options' rawArgs - - let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts - - unless (null errors && null unknownOptionErrors) $ - err 2 $ concat errors ++ unlines unknownOptionErrors ++ - ("Try " ++ prg ++ " --help for more information.") - - -- thread option data structure through all supplied option actions - opts <- foldl (>>=) (return defaults) actions - return (opts{ optInputFiles = args }) - -convertWithOpts :: Opt -> IO () -convertWithOpts opts = do - let args = optInputFiles opts - let outputFile = optOutputFile opts - let filters = optFilters opts - let verbosity = optVerbosity opts - - when (optDumpArgs opts) $ - do UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) args - exitSuccess - - epubStylesheet <- case optEpubStylesheet opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - epubMetadata <- case optEpubMetadata opts of - Nothing -> return Nothing - Just fp -> Just <$> UTF8.readFile fp - - let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" - let mathMethod = - case (optKaTeXJS opts, optKaTeXStylesheet opts) of - (Nothing, _) -> optHTMLMathMethod opts - (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) - - - -- --bibliography implies -F pandoc-citeproc for backwards compatibility: - let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && - optCiteMethod opts `notElem` [Natbib, Biblatex] && - "pandoc-citeproc" `notElem` map takeBaseName filters - let filters' = if needsCiteproc then "pandoc-citeproc" : filters - else filters - - let sources = case args of - [] -> ["-"] - xs | optIgnoreArgs opts -> ["-"] - | otherwise -> xs - - datadir <- case optDataDir opts of - Nothing -> E.catch - (Just <$> getAppUserDataDirectory "pandoc") - (\e -> let _ = (e :: E.SomeException) - in return Nothing) - Just _ -> return $ optDataDir opts - - -- assign reader and writer based on options and filenames - let readerName = case optReader opts of - Nothing -> defaultReaderName - (if any isURI sources - then "html" - else "markdown") sources - Just x -> map toLower x - - let writerName = case optWriter opts of - Nothing -> defaultWriterName outputFile - Just x -> map toLower x - let format = takeWhile (`notElem` ['+','-']) - $ takeFileName writerName -- in case path to lua script - - let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - - let laTeXOutput = format `elem` ["latex", "beamer"] - let conTeXtOutput = format == "context" - let html5Output = format == "html5" || format == "html" - - -- disabling the custom writer for now - writer <- if ".lua" `isSuffixOf` format - -- note: use non-lowercased version writerName - then error "custom writers disabled for now" - else case getWriter writerName of - Left e -> err 9 $ - if format == "pdf" - then e ++ - "\nTo create a pdf with pandoc, use " ++ - "the latex or beamer writer and specify\n" ++ - "an output file with .pdf extension " ++ - "(pandoc -t latex -o filename.pdf)." - else e - Right w -> return (w :: Writer PandocIO) - - -- TODO: we have to get the input and the output into the state for - -- the sake of the text2tags reader. - reader <- case getReader readerName of - Right r -> return (r :: Reader PandocIO) - Left e -> err 7 e' - where e' = case readerName of - "pdf" -> e ++ - "\nPandoc can convert to PDF, but not from PDF." - "doc" -> e ++ - "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." - _ -> e - - let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput - - templ <- case optTemplate opts of - _ | not standalone -> return Nothing - Nothing -> do - deftemp <- getDefaultTemplate datadir format - case deftemp of - Left e -> throwIO e - Right t -> return (Just t) - Just tp -> do - -- strip off extensions - let tp' = case takeExtension tp of - "" -> tp <.> format - _ -> tp - Just <$> E.catch (UTF8.readFile tp') - (\e -> if isDoesNotExistError e - then E.catch - (readDataFileUTF8 datadir - ("templates" </> tp')) - (\e' -> let _ = (e' :: E.SomeException) - in throwIO e') - else throwIO e) - - let addStringAsVariable varname s vars = return $ (varname, s) : vars - - let addContentsAsVariable varname fp vars = do - s <- UTF8.readFile fp - return $ (varname, s) : vars - - -- note: this reverses the list constructed in option parsing, - -- which in turn was reversed from the command-line order, - -- so we end up with the correct order in the variable list: - let withList _ [] vars = return vars - withList f (x:xs) vars = f x vars >>= withList f xs - - variables <- return (optVariables opts) - >>= - withList (addContentsAsVariable "include-before") - (optIncludeBeforeBody opts) - >>= - withList (addContentsAsVariable "include-after") - (optIncludeAfterBody opts) - >>= - withList (addContentsAsVariable "header-includes") - (optIncludeInHeader opts) - >>= - withList (addStringAsVariable "css") (optCss opts) - >>= - maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) - >>= - maybe return (addStringAsVariable "epub-cover-image") - (optEpubCoverImage opts) - >>= - (\vars -> case mathMethod of - LaTeXMathML Nothing -> do - s <- readDataFileUTF8 datadir "LaTeXMathML.js" - return $ ("mathml-script", s) : vars - _ -> return vars) - >>= - (\vars -> if format == "dzslides" - then do - dztempl <- readDataFileUTF8 datadir - ("dzslides" </> "template.html") - let dzline = "<!-- {{{{ dzslides core" - let dzcore = unlines - $ dropWhile (not . (dzline `isPrefixOf`)) - $ lines dztempl - return $ ("dzslides-core", dzcore) : vars - else return vars) - - let sourceURL = case sources of - [] -> Nothing - (x:_) -> case parseURI x of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriQuery = "", - uriFragment = "" } - _ -> Nothing - - let readerOpts = def{ readerStandalone = standalone - , readerColumns = optColumns opts - , readerTabStop = optTabStop opts - , readerIndentedCodeClasses = optIndentedCodeClasses opts - , readerApplyMacros = not laTeXOutput - , readerDefaultImageExtension = - optDefaultImageExtension opts - , readerTrackChanges = optTrackChanges opts - } - - highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts - - let writerOptions = def { writerTemplate = templ, - writerVariables = variables, - writerTabStop = optTabStop opts, - writerTableOfContents = optTableOfContents opts, - writerHTMLMathMethod = mathMethod, - writerIncremental = optIncremental opts, - writerCiteMethod = optCiteMethod opts, - writerNumberSections = optNumberSections opts, - writerNumberOffset = optNumberOffset opts, - writerSectionDivs = optSectionDivs opts, - writerReferenceLinks = optReferenceLinks opts, - writerReferenceLocation = optReferenceLocation opts, - writerDpi = optDpi opts, - writerWrapText = optWrapText opts, - writerColumns = optColumns opts, - writerEmailObfuscation = optEmailObfuscation opts, - writerIdentifierPrefix = optIdentifierPrefix opts, - writerSourceURL = sourceURL, - writerUserDataDir = datadir, - writerHtmlQTags = optHtmlQTags opts, - writerTopLevelDivision = optTopLevelDivision opts, - writerListings = optListings opts, - writerSlideLevel = optSlideLevel opts, - writerHighlightStyle = highlightStyle, - writerSetextHeaders = optSetextHeaders opts, - writerEpubMetadata = epubMetadata, - writerEpubStylesheet = epubStylesheet, - writerEpubFonts = optEpubFonts opts, - writerEpubChapterLevel = optEpubChapterLevel opts, - writerTOCDepth = optTOCDepth opts, - writerReferenceDoc = optReferenceDoc opts, - writerLaTeXArgs = optLaTeXEngineArgs opts - } - - -#ifdef _WINDOWS - let istty = True -#else - istty <- queryTerminal stdOutput -#endif - when (istty && not (isTextFormat format) && outputFile == "-") $ - err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++ - "Specify an output file using the -o option." - - - let transforms = case optBaseHeaderLevel opts of - x | x > 1 -> [headerShift (x - 1)] - | otherwise -> [] - - let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" - then 0 - else optTabStop opts) - - readSources :: (Functor m, MonadIO m) => [FilePath] -> m String - readSources srcs = convertTabs . intercalate "\n" <$> - mapM readSource srcs - - let runIO' :: PandocIO a -> IO a - runIO' f = do - (res, reports) <- runIOorExplode $ do - setVerbosity verbosity - x <- f - rs <- getLog - return (x, rs) - case optLogFile opts of - Nothing -> return () - Just logfile -> B.writeFile logfile (encodeLogMessages reports) - let isWarning msg = messageVerbosity msg == WARNING - when (optFailIfWarnings opts && any isWarning reports) $ - err 3 "Failing because there were warnings." - return res - - let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) - sourceToDoc sources' = - case reader of - StringReader r - | optFileScope opts || readerName == "json" -> do - pairs <- mapM - (readSource >=> withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) - | otherwise -> - readSources sources' >>= withMediaBag . r readerOpts - ByteStringReader r -> do - pairs <- mapM (readFile' >=> - withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) - - runIO' $ do - (doc, media) <- sourceToDoc sources - doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> - return . flip (foldr addMetadata) (optMetadata opts) >=> - applyTransforms transforms >=> - applyFilters datadir filters' [format]) doc - - case writer of - -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile - ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile - StringWriter f - | pdfOutput -> do - -- make sure writer is latex or beamer or context or html5 - unless (laTeXOutput || conTeXtOutput || html5Output) $ - err 47 $ "cannot produce pdf output with " ++ format ++ - " writer" - - let pdfprog = case () of - _ | conTeXtOutput -> "context" - _ | html5Output -> "wkhtmltopdf" - _ -> optLaTeXEngine opts - -- check for pdf creating program - mbPdfProg <- liftIO $ findExecutable pdfprog - when (isNothing mbPdfProg) $ - err 41 $ pdfprog ++ " not found. " ++ - pdfprog ++ " is needed for pdf output." - - res <- makePDF pdfprog f writerOptions verbosity media doc' - case res of - Right pdf -> writeFnBinary outputFile pdf - Left err' -> liftIO $ do - B.hPutStr stderr err' - B.hPut stderr $ B.pack [10] - err 43 "Error producing PDF" - | otherwise -> do - let htmlFormat = format `elem` - ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] - selfcontain = if optSelfContained opts && htmlFormat - then makeSelfContained writerOptions media - else return - handleEntities = if htmlFormat && optAscii opts - then toEntities - else id - output <- f writerOptions doc' - selfcontain (output ++ ['\n' | not standalone]) >>= - writerFn outputFile . handleEntities - -type Transform = Pandoc -> Pandoc - -isTextFormat :: String -> Bool -isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] - -externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc -externalFilter f args' d = liftIO $ do - exists <- doesFileExist f - isExecutable <- if exists - then executable <$> getPermissions f - else return True - let (f', args'') = if exists - then case map toLower (takeExtension f) of - _ | isExecutable -> ("." </> f, args') - ".py" -> ("python", f:args') - ".hs" -> ("runhaskell", f:args') - ".pl" -> ("perl", f:args') - ".rb" -> ("ruby", f:args') - ".php" -> ("php", f:args') - ".js" -> ("node", f:args') - _ -> (f, args') - else (f, args') - unless (exists && isExecutable) $ do - mbExe <- findExecutable f' - when (isNothing mbExe) $ - err 83 $ "Error running filter " ++ f ++ ":\n" ++ - "Could not find executable '" ++ f' ++ "'." - env <- getEnvironment - let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env - (exitcode, outbs) <- E.handle filterException $ - pipeProcess env' f' args'' $ encode d - case exitcode of - ExitSuccess -> return $ either error id $ eitherDecode' outbs - ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ - "Filter returned error status " ++ show ec - where filterException :: E.SomeException -> IO a - filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ - show e - --- | Data structure for command line options. -data Opt = Opt - { optTabStop :: Int -- ^ Number of spaces per tab - , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces - , optStandalone :: Bool -- ^ Include header, footer - , optReader :: Maybe String -- ^ Reader format - , optWriter :: Maybe String -- ^ Writer format - , optTableOfContents :: Bool -- ^ Include table of contents - , optBaseHeaderLevel :: Int -- ^ Base header level - , optTemplate :: Maybe FilePath -- ^ Custom template - , optVariables :: [(String,String)] -- ^ Template variables to set - , optMetadata :: [(String, String)] -- ^ Metadata fields to set - , optOutputFile :: FilePath -- ^ Name of output file - , optInputFiles :: [FilePath] -- ^ Names of input files - , optNumberSections :: Bool -- ^ Number sections in LaTeX - , optNumberOffset :: [Int] -- ^ Starting number for sections - , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 - , optSelfContained :: Bool -- ^ Make HTML accessible offline - , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML - , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code - , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions - , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math - , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc - , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet - , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata - , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed - , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters - , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub - , optTOCDepth :: Int -- ^ Number of levels to include in TOC - , optDumpArgs :: Bool -- ^ Output command-line arguments - , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output - , optLogFile :: Maybe FilePath -- ^ File to write JSON log output - , optFailIfWarnings :: Bool -- ^ Fail on warnings - , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output - , optDpi :: Int -- ^ Dpi - , optWrapText :: WrapOption -- ^ Options for wrapping text - , optColumns :: Int -- ^ Line length in characters - , optFilters :: [FilePath] -- ^ Filters to apply - , optEmailObfuscation :: ObfuscationMethod - , optIdentifierPrefix :: String - , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks - , optDataDir :: Maybe FilePath - , optCiteMethod :: CiteMethod -- ^ Method to output cites - , optListings :: Bool -- ^ Use listings package for code blocks - , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf - , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine - , optSlideLevel :: Maybe Int -- ^ Header level that creates slides - , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 - , optAscii :: Bool -- ^ Use ascii characters only in html - , optDefaultImageExtension :: String -- ^ Default image extension - , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media - , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. - , optFileScope :: Bool -- ^ Parse input files before combining - , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX - , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX - , optTitlePrefix :: Maybe String -- ^ Prefix for title - , optCss :: [FilePath] -- ^ CSS files to link to - , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before - , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body - , optIncludeInHeader :: [FilePath] -- ^ Files to include in header - } - --- | Defaults for command-line options. -defaultOpts :: Opt -defaultOpts = Opt - { optTabStop = 4 - , optPreserveTabs = False - , optStandalone = False - , optReader = Nothing - , optWriter = Nothing - , optTableOfContents = False - , optBaseHeaderLevel = 1 - , optTemplate = Nothing - , optVariables = [] - , optMetadata = [] - , optOutputFile = "-" -- "-" means stdout - , optInputFiles = [] - , optNumberSections = False - , optNumberOffset = [0,0,0,0,0,0] - , optSectionDivs = False - , optIncremental = False - , optSelfContained = False - , optHtmlQTags = False - , optHighlightStyle = Just "pygments" - , optTopLevelDivision = TopLevelDefault - , optHTMLMathMethod = PlainMath - , optReferenceDoc = Nothing - , optEpubStylesheet = Nothing - , optEpubMetadata = Nothing - , optEpubFonts = [] - , optEpubChapterLevel = 1 - , optEpubCoverImage = Nothing - , optTOCDepth = 3 - , optDumpArgs = False - , optIgnoreArgs = False - , optVerbosity = WARNING - , optLogFile = Nothing - , optFailIfWarnings = False - , optReferenceLinks = False - , optReferenceLocation = EndOfDocument - , optDpi = 96 - , optWrapText = WrapAuto - , optColumns = 72 - , optFilters = [] - , optEmailObfuscation = NoObfuscation - , optIdentifierPrefix = "" - , optIndentedCodeClasses = [] - , optDataDir = Nothing - , optCiteMethod = Citeproc - , optListings = False - , optLaTeXEngine = "pdflatex" - , optLaTeXEngineArgs = [] - , optSlideLevel = Nothing - , optSetextHeaders = True - , optAscii = False - , optDefaultImageExtension = "" - , optExtractMedia = Nothing - , optTrackChanges = AcceptChanges - , optFileScope = False - , optKaTeXStylesheet = Nothing - , optKaTeXJS = Nothing - , optTitlePrefix = Nothing - , optCss = [] - , optIncludeBeforeBody = [] - , optIncludeAfterBody = [] - , optIncludeInHeader = [] - } - -addMetadata :: (String, String) -> Pandoc -> Pandoc -addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs - where meta' = case lookupMeta k meta of - Nothing -> setMeta k v' meta - Just (MetaList xs) -> - setMeta k (MetaList (xs ++ [v'])) meta - Just x -> setMeta k (MetaList [x, v']) meta - v' = readMetaValue v - -readMetaValue :: String -> MetaValue -readMetaValue s = case decode (UTF8.fromString s) of - Just (Yaml.String t) -> MetaString $ T.unpack t - Just (Yaml.Bool b) -> MetaBool b - _ -> MetaString s - --- Determine default reader based on source file extensions -defaultReaderName :: String -> [FilePath] -> String -defaultReaderName fallback [] = fallback -defaultReaderName fallback (x:xs) = - case takeExtension (map toLower x) of - ".xhtml" -> "html" - ".html" -> "html" - ".htm" -> "html" - ".md" -> "markdown" - ".markdown" -> "markdown" - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".rst" -> "rst" - ".org" -> "org" - ".lhs" -> "markdown+lhs" - ".db" -> "docbook" - ".opml" -> "opml" - ".wiki" -> "mediawiki" - ".dokuwiki" -> "dokuwiki" - ".textile" -> "textile" - ".native" -> "native" - ".json" -> "json" - ".docx" -> "docx" - ".t2t" -> "t2t" - ".epub" -> "epub" - ".odt" -> "odt" - ".pdf" -> "pdf" -- so we get an "unknown reader" error - ".doc" -> "doc" -- so we get an "unknown reader" error - _ -> defaultReaderName fallback xs - --- Determine default writer based on output file extension -defaultWriterName :: FilePath -> String -defaultWriterName "-" = "html" -- no output file -defaultWriterName x = - case takeExtension (map toLower x) of - "" -> "markdown" -- empty extension - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".context" -> "context" - ".ctx" -> "context" - ".rtf" -> "rtf" - ".rst" -> "rst" - ".s5" -> "s5" - ".native" -> "native" - ".json" -> "json" - ".txt" -> "markdown" - ".text" -> "markdown" - ".md" -> "markdown" - ".markdown" -> "markdown" - ".textile" -> "textile" - ".lhs" -> "markdown+lhs" - ".texi" -> "texinfo" - ".texinfo" -> "texinfo" - ".db" -> "docbook" - ".odt" -> "odt" - ".docx" -> "docx" - ".epub" -> "epub" - ".org" -> "org" - ".asciidoc" -> "asciidoc" - ".adoc" -> "asciidoc" - ".pdf" -> "latex" - ".fb2" -> "fb2" - ".opml" -> "opml" - ".icml" -> "icml" - ".tei.xml" -> "tei" - ".tei" -> "tei" - ['.',y] | y `elem` ['1'..'9'] -> "man" - _ -> "html" - --- Transformations of a Pandoc document post-parsing: - -extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc -extractMedia media dir d = - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - extractMediaBag True dir media - return $ walk (adjustImagePath dir fps) d - -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) -adjustImagePath _ _ x = x - -applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc -applyTransforms transforms d = return $ foldr ($) d transforms - - -- First we check to see if a filter is found. If not, and if it's - -- not an absolute path, we check to see whether it's in `userdir/filters`. - -- If not, we leave it unchanged. -expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath -expandFilterPath mbDatadir fp = liftIO $ do - fpExists <- doesFileExist fp - if fpExists - then return fp - else case mbDatadir of - Just datadir | isRelative fp -> do - let filterPath = (datadir </> "filters" </> fp) - filterPathExists <- doesFileExist filterPath - if filterPathExists - then return filterPath - else return fp - _ -> return fp - -applyFilters :: MonadIO m - => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc -applyFilters mbDatadir filters args d = do - expandedFilters <- mapM (expandFilterPath mbDatadir) filters - foldrM ($) d $ map (flip externalFilter args) expandedFilters - -readSource :: MonadIO m => FilePath -> m String -readSource "-" = liftIO UTF8.getContents -readSource src = case parseURI src of - Just u | uriScheme u `elem` ["http:","https:"] -> - readURI src - | uriScheme u == "file:" -> - liftIO $ UTF8.readFile (uriPath u) - _ -> liftIO $ UTF8.readFile src - -readURI :: MonadIO m => FilePath -> m String -readURI src = do - res <- liftIO $ openURL src - case res of - Left e -> liftIO $ throwIO e - Right (bs,_) -> return $ UTF8.toString bs - -readFile' :: MonadIO m => FilePath -> m B.ByteString -readFile' "-" = liftIO $ B.getContents -readFile' f = liftIO $ B.readFile f - -writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () -writeFnBinary "-" = liftIO . B.putStr -writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) - -writerFn :: MonadIO m => FilePath -> String -> m () -writerFn "-" = liftIO . UTF8.putStr -writerFn f = liftIO . UTF8.writeFile f - -lookupHighlightStyle :: Maybe String -> IO (Maybe Style) -lookupHighlightStyle Nothing = return Nothing -lookupHighlightStyle (Just s) = - case lookup (map toLower s) highlightingStyles of - Just sty -> return (Just sty) - Nothing -> err 68 $ "Unknown highlight-style " ++ s - --- | A list of functions, each transforming the options data structure --- in response to a command-line option. -options :: [OptDescr (Opt -> IO Opt)] -options = - [ Option "fr" ["from","read"] - (ReqArg - (\arg opt -> return opt { optReader = Just arg }) - "FORMAT") - "" - - , Option "tw" ["to","write"] - (ReqArg - (\arg opt -> return opt { optWriter = Just arg }) - "FORMAT") - "" - - , Option "o" ["output"] - (ReqArg - (\arg opt -> return opt { optOutputFile = arg }) - "FILENAME") - "" -- "Name of output file" - - , Option "" ["data-dir"] - (ReqArg - (\arg opt -> return opt { optDataDir = Just arg }) - "DIRECTORY") -- "Directory containing pandoc data files." - "" - - , Option "" ["base-header-level"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 && t < 6 -> do - return opt{ optBaseHeaderLevel = t } - _ -> err 19 - "base-header-level must be 1-5") - "NUMBER") - "" -- "Headers base level" - - , Option "" ["indented-code-classes"] - (ReqArg - (\arg opt -> return opt { optIndentedCodeClasses = words $ - map (\c -> if c == ',' then ' ' else c) arg }) - "STRING") - "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" - - , Option "F" ["filter"] - (ReqArg - (\arg opt -> return opt { optFilters = arg : optFilters opt }) - "PROGRAM") - "" -- "External JSON filter" - - , Option "p" ["preserve-tabs"] - (NoArg - (\opt -> return opt { optPreserveTabs = True })) - "" -- "Preserve tabs instead of converting to spaces" - - , Option "" ["tab-stop"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 -> return opt { optTabStop = t } - _ -> err 31 - "tab-stop must be a number greater than 0") - "NUMBER") - "" -- "Tab stop (default 4)" - - , Option "" ["track-changes"] - (ReqArg - (\arg opt -> do - action <- case arg of - "accept" -> return AcceptChanges - "reject" -> return RejectChanges - "all" -> return AllChanges - _ -> err 6 - ("Unknown option for track-changes: " ++ arg) - return opt { optTrackChanges = action }) - "accept|reject|all") - "" -- "Accepting or reject MS Word track-changes."" - - , Option "" ["file-scope"] - (NoArg - (\opt -> return opt { optFileScope = True })) - "" -- "Parse input files before combining" - - , Option "" ["extract-media"] - (ReqArg - (\arg opt -> - return opt { optExtractMedia = Just arg }) - "PATH") - "" -- "Directory to which to extract embedded media" - - , Option "s" ["standalone"] - (NoArg - (\opt -> return opt { optStandalone = True })) - "" -- "Include needed header and footer on output" - - , Option "" ["template"] - (ReqArg - (\arg opt -> - return opt{ optTemplate = Just arg, - optStandalone = True }) - "FILENAME") - "" -- "Use custom template" - - , Option "M" ["metadata"] - (ReqArg - (\arg opt -> do - let (key, val) = splitField arg - return opt{ optMetadata = (key, val) : optMetadata opt }) - "KEY[:VALUE]") - "" - - , Option "V" ["variable"] - (ReqArg - (\arg opt -> do - let (key, val) = splitField arg - return opt{ optVariables = (key, val) : optVariables opt }) - "KEY[:VALUE]") - "" - - , Option "D" ["print-default-template"] - (ReqArg - (\arg _ -> do - templ <- getDefaultTemplate Nothing arg - case templ of - Right t -> UTF8.hPutStr stdout t - Left e -> error $ show e - exitSuccess) - "FORMAT") - "" -- "Print default template for FORMAT" - - , Option "" ["print-default-data-file"] - (ReqArg - (\arg _ -> do - readDataFile Nothing arg >>= BS.hPutStr stdout - exitSuccess) - "FILE") - "" -- "Print default data file" - - , Option "" ["dpi"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 -> return opt { optDpi = t } - _ -> err 31 - "dpi must be a number greater than 0") - "NUMBER") - "" -- "Dpi (default 96)" - - , Option "" ["wrap"] - (ReqArg - (\arg opt -> - case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of - Just o -> return opt { optWrapText = o } - Nothing -> err 77 "--wrap must be auto, none, or preserve") - "auto|none|preserve") - "" -- "Option for wrapping text in output" - - , Option "" ["columns"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 -> return opt { optColumns = t } - _ -> err 33 - "columns must be a number greater than 0") - "NUMBER") - "" -- "Length of line in characters" - - , Option "" ["toc", "table-of-contents"] - (NoArg - (\opt -> return opt { optTableOfContents = True })) - "" -- "Include table of contents" - - , Option "" ["toc-depth"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t >= 1 && t <= 6 -> - return opt { optTOCDepth = t } - _ -> err 57 - "TOC level must be a number between 1 and 6") - "NUMBER") - "" -- "Number of levels to include in TOC" - - , Option "" ["no-highlight"] - (NoArg - (\opt -> return opt { optHighlightStyle = Nothing })) - "" -- "Don't highlight source code" - - , Option "" ["highlight-style"] - (ReqArg - (\arg opt -> return opt{ optHighlightStyle = Just arg }) - "STYLE") - "" -- "Style for highlighted code" - - , Option "H" ["include-in-header"] - (ReqArg - (\arg opt -> return opt{ optIncludeInHeader = - arg : optIncludeInHeader opt, - optStandalone = True }) - "FILENAME") - "" -- "File to include at end of header (implies -s)" - - , Option "B" ["include-before-body"] - (ReqArg - (\arg opt -> return opt{ optIncludeBeforeBody = - arg : optIncludeBeforeBody opt, - optStandalone = True }) - "FILENAME") - "" -- "File to include before document body" - - , Option "A" ["include-after-body"] - (ReqArg - (\arg opt -> return opt{ optIncludeAfterBody = - arg : optIncludeAfterBody opt, - optStandalone = True }) - "FILENAME") - "" -- "File to include after document body" - - , Option "" ["self-contained"] - (NoArg - (\opt -> return opt { optSelfContained = True, - optStandalone = True })) - "" -- "Make slide shows include all the needed js and css" - - , Option "" ["html-q-tags"] - (NoArg - (\opt -> - return opt { optHtmlQTags = True })) - "" -- "Use <q> tags for quotes in HTML" - - , Option "" ["ascii"] - (NoArg - (\opt -> return opt { optAscii = True })) - "" -- "Use ascii characters only in HTML output" - - , Option "" ["reference-links"] - (NoArg - (\opt -> return opt { optReferenceLinks = True } )) - "" -- "Use reference links in parsing HTML" - - , Option "" ["reference-location"] - (ReqArg - (\arg opt -> do - action <- case arg of - "block" -> return EndOfBlock - "section" -> return EndOfSection - "document" -> return EndOfDocument - _ -> err 6 - ("Unknown option for reference-location: " ++ arg) - return opt { optReferenceLocation = action }) - "block|section|document") - "" -- "Accepting or reject MS Word track-changes."" - - , Option "" ["atx-headers"] - (NoArg - (\opt -> return opt { optSetextHeaders = False } )) - "" -- "Use atx-style headers for markdown" - - , Option "" ["top-level-division"] - (ReqArg - (\arg opt -> do - let tldName = "TopLevel" ++ uppercaseFirstLetter arg - case safeRead tldName of - Just tlDiv -> return opt { optTopLevelDivision = tlDiv } - _ -> err 76 ("Top-level division must be " ++ - "section, chapter, part, or default")) - "section|chapter|part") - "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook" - - , Option "N" ["number-sections"] - (NoArg - (\opt -> return opt { optNumberSections = True })) - "" -- "Number sections in LaTeX" - - , Option "" ["number-offset"] - (ReqArg - (\arg opt -> - case safeRead ('[':arg ++ "]") of - Just ns -> return opt { optNumberOffset = ns, - optNumberSections = True } - _ -> err 57 "could not parse number-offset") - "NUMBERS") - "" -- "Starting number for sections, subsections, etc." - - , Option "" ["listings"] - (NoArg - (\opt -> return opt { optListings = True })) - "" -- "Use listings package for LaTeX code blocks" - - , Option "i" ["incremental"] - (NoArg - (\opt -> return opt { optIncremental = True })) - "" -- "Make list items display incrementally in Slidy/Slideous/S5" - - , Option "" ["slide-level"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t >= 1 && t <= 6 -> - return opt { optSlideLevel = Just t } - _ -> err 39 - "slide level must be a number between 1 and 6") - "NUMBER") - "" -- "Force header level for slides" - - , Option "" ["section-divs"] - (NoArg - (\opt -> return opt { optSectionDivs = True })) - "" -- "Put sections in div tags in HTML" - - , Option "" ["default-image-extension"] - (ReqArg - (\arg opt -> return opt { optDefaultImageExtension = arg }) - "extension") - "" -- "Default extension for extensionless images" - - , Option "" ["email-obfuscation"] - (ReqArg - (\arg opt -> do - method <- case arg of - "references" -> return ReferenceObfuscation - "javascript" -> return JavascriptObfuscation - "none" -> return NoObfuscation - _ -> err 6 - ("Unknown obfuscation method: " ++ arg) - return opt { optEmailObfuscation = method }) - "none|javascript|references") - "" -- "Method for obfuscating email in HTML" - - , Option "" ["id-prefix"] - (ReqArg - (\arg opt -> return opt { optIdentifierPrefix = arg }) - "STRING") - "" -- "Prefix to add to automatically generated HTML identifiers" - - , Option "T" ["title-prefix"] - (ReqArg - (\arg opt -> do - let newvars = ("title-prefix", arg) : optVariables opt - return opt { optVariables = newvars, - optStandalone = True }) - "STRING") - "" -- "String to prefix to HTML window title" - - , Option "c" ["css"] - (ReqArg - (\arg opt -> return opt{ optCss = arg : optCss opt }) - -- add new link to end, so it is included in proper order - "URL") - "" -- "Link to CSS style sheet" - - , Option "" ["reference-doc"] - (ReqArg - (\arg opt -> - return opt { optReferenceDoc = Just arg }) - "FILENAME") - "" -- "Path of custom reference doc" - - , Option "" ["epub-stylesheet"] - (ReqArg - (\arg opt -> return opt { optEpubStylesheet = Just arg }) - "FILENAME") - "" -- "Path of epub.css" - - , Option "" ["epub-cover-image"] - (ReqArg - (\arg opt -> - return opt { optVariables = - ("epub-cover-image", arg) : optVariables opt }) - "FILENAME") - "" -- "Path of epub cover image" - - , Option "" ["epub-metadata"] - (ReqArg - (\arg opt -> return opt { optEpubMetadata = Just arg }) - "FILENAME") - "" -- "Path of epub metadata file" - - , Option "" ["epub-embed-font"] - (ReqArg - (\arg opt -> - return opt{ optEpubFonts = arg : optEpubFonts opt }) - "FILE") - "" -- "Directory of fonts to embed" - - , Option "" ["epub-chapter-level"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t >= 1 && t <= 6 -> - return opt { optEpubChapterLevel = t } - _ -> err 59 - "chapter level must be a number between 1 and 6") - "NUMBER") - "" -- "Header level at which to split chapters in EPUB" - - , Option "" ["latex-engine"] - (ReqArg - (\arg opt -> do - let b = takeBaseName arg - if b `elem` ["pdflatex", "lualatex", "xelatex"] - then return opt { optLaTeXEngine = arg } - else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") - "PROGRAM") - "" -- "Name of latex program to use in generating PDF" - - , Option "" ["latex-engine-opt"] - (ReqArg - (\arg opt -> do - let oldArgs = optLaTeXEngineArgs opt - return opt { optLaTeXEngineArgs = arg : oldArgs }) - "STRING") - "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used" - - , Option "" ["bibliography"] - (ReqArg - (\arg opt -> return opt{ optMetadata = - ("bibliography", arg) : optMetadata opt }) - "FILE") - "" - - , Option "" ["csl"] - (ReqArg - (\arg opt -> - return opt{ optMetadata = - ("csl", arg) : optMetadata opt }) - "FILE") - "" - - , Option "" ["citation-abbreviations"] - (ReqArg - (\arg opt -> - return opt{ optMetadata = - ("citation-abbreviations", arg): optMetadata opt }) - "FILE") - "" - - , Option "" ["natbib"] - (NoArg - (\opt -> return opt { optCiteMethod = Natbib })) - "" -- "Use natbib cite commands in LaTeX output" - - , Option "" ["biblatex"] - (NoArg - (\opt -> return opt { optCiteMethod = Biblatex })) - "" -- "Use biblatex cite commands in LaTeX output" - - , Option "m" ["latexmathml", "asciimathml"] - (OptArg - (\arg opt -> - return opt { optHTMLMathMethod = LaTeXMathML arg }) - "URL") - "" -- "Use LaTeXMathML script in html output" - - , Option "" ["mathml"] - (NoArg - (\opt -> - return opt { optHTMLMathMethod = MathML })) - "" -- "Use mathml for HTML math" - - , Option "" ["mimetex"] - (OptArg - (\arg opt -> do - let url' = case arg of - Just u -> u ++ "?" - Nothing -> "/cgi-bin/mimetex.cgi?" - return opt { optHTMLMathMethod = WebTeX url' }) - "URL") - "" -- "Use mimetex for HTML math" - - , Option "" ["webtex"] - (OptArg - (\arg opt -> do - let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg - return opt { optHTMLMathMethod = WebTeX url' }) - "URL") - "" -- "Use web service for HTML math" - - , Option "" ["jsmath"] - (OptArg - (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) - "URL") - "" -- "Use jsMath for HTML math" - - , Option "" ["mathjax"] - (OptArg - (\arg opt -> do - let url' = fromMaybe "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML-full" arg - return opt { optHTMLMathMethod = MathJax url'}) - "URL") - "" -- "Use MathJax for HTML math" - , Option "" ["katex"] - (OptArg - (\arg opt -> - return opt - { optKaTeXJS = - arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"}) - "URL") - "" -- Use KaTeX for HTML Math - - , Option "" ["katex-stylesheet"] - (ReqArg - (\arg opt -> - return opt { optKaTeXStylesheet = Just arg }) - "URL") - "" -- Set the KaTeX Stylesheet location - - , Option "" ["gladtex"] - (NoArg - (\opt -> return opt { optHTMLMathMethod = GladTeX })) - "" -- "Use gladtex for HTML math" - - , Option "" ["trace"] - (NoArg - (\opt -> return opt { optVerbosity = DEBUG })) - "" -- "Turn on diagnostic tracing in readers." - - , Option "" ["dump-args"] - (NoArg - (\opt -> return opt { optDumpArgs = True })) - "" -- "Print output filename and arguments to stdout." - - , Option "" ["ignore-args"] - (NoArg - (\opt -> return opt { optIgnoreArgs = True })) - "" -- "Ignore command-line arguments." - - , Option "" ["verbose"] - (NoArg - (\opt -> return opt { optVerbosity = INFO })) - "" -- "Verbose diagnostic output." - - , Option "" ["quiet"] - (NoArg - (\opt -> return opt { optVerbosity = ERROR })) - "" -- "Suppress warnings." - - , Option "" ["fail-if-warnings"] - (NoArg - (\opt -> return opt { optFailIfWarnings = True })) - "" -- "Exit with error status if there were warnings." - - , Option "" ["log"] - (ReqArg - (\arg opt -> return opt{ optLogFile = Just arg }) - "FILE") - "" -- "Log messages in JSON format to this file." - - , Option "" ["bash-completion"] - (NoArg - (\_ -> do - ddir <- getDataDir - tpl <- readDataFileUTF8 Nothing "bash_completion.tpl" - let optnames (Option shorts longs _ _) = - map (\c -> ['-',c]) shorts ++ - map ("--" ++) longs - let allopts = unwords (concatMap optnames options) - UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords readers'names) - (unwords writers'names) - (unwords $ map fst highlightingStyles) - ddir - exitSuccess )) - "" -- "Print bash completion script" - - , Option "" ["list-input-formats"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) readers'names - exitSuccess )) - "" - - , Option "" ["list-output-formats"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) writers'names - exitSuccess )) - "" - - , Option "" ["list-extensions"] - (NoArg - (\_ -> do - let showExt x = drop 4 (show x) ++ - if extensionEnabled x pandocExtensions - then " +" - else " -" - mapM_ (UTF8.hPutStrLn stdout . showExt) - ([minBound..maxBound] :: [Extension]) - exitSuccess )) - "" - - , Option "" ["list-highlight-languages"] - (NoArg - (\_ -> do - let langs = [ T.unpack (T.toLower (sShortname s)) - | s <- M.elems defaultSyntaxMap - , sShortname s `notElem` - [T.pack "Alert", T.pack "Alert_indent"] - ] - mapM_ (UTF8.hPutStrLn stdout) langs - exitSuccess )) - "" - - , Option "" ["list-highlight-styles"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) $ - map fst highlightingStyles - exitSuccess )) - "" - - , Option "v" ["version"] - (NoArg - (\_ -> do - prg <- getProgName - defaultDatadir <- E.catch - (getAppUserDataDirectory "pandoc") - (\e -> let _ = (e :: E.SomeException) - in return "") - UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ - compileInfo ++ "\nDefault user data directory: " ++ - defaultDatadir ++ copyrightMessage) - exitSuccess )) - "" -- "Print version" - - , Option "h" ["help"] - (NoArg - (\_ -> do - prg <- getProgName - UTF8.hPutStr stdout (usageMessage prg options) - exitSuccess )) - "" -- "Show help" - - ] - --- Returns usage message -usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String -usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]") - -copyrightMessage :: String -copyrightMessage = intercalate "\n" [ - "", - "Copyright (C) 2006-2017 John MacFarlane", - "Web: http://pandoc.org", - "This is free software; see the source for copying conditions.", - "There is no warranty, not even for merchantability or fitness", - "for a particular purpose." ] - -compileInfo :: String -compileInfo = - "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ - VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting - -handleUnrecognizedOption :: String -> [String] -> [String] -handleUnrecognizedOption "--smart" = - (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++ - "For example: pandoc -f markdown+smart -t markdown-smart.") :) -handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart" -handleUnrecognizedOption "--old-dashes" = - ("--old-dashes has been removed. Use +old_dashes extension instead." :) -handleUnrecognizedOption "--no-wrap" = - ("--no-wrap has been removed. Use --wrap=none instead." :) -handleUnrecognizedOption "--chapters" = - ("--chapters has been removed. Use --top-level-division=chapter instead." :) -handleUnrecognizedOption "--reference-docx" = - ("--reference-docx has been removed. Use --reference-doc instead." :) -handleUnrecognizedOption "--reference-odt" = - ("--reference-odt has been removed. Use --reference-doc instead." :) -handleUnrecognizedOption "--parse-raw" = - (("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n") :) -handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw" -handleUnrecognizedOption x = - (("Unknown option " ++ x ++ ".") :) - -uppercaseFirstLetter :: String -> String -uppercaseFirstLetter (c:cs) = toUpper c : cs -uppercaseFirstLetter [] = [] - -readers'names :: [String] -readers'names = sort (map fst (readers :: [(String, Reader PandocIO)])) - -writers'names :: [String] -writers'names = sort (map fst (writers :: [(String, Writer PandocIO)])) - -splitField :: String -> (String, String) -splitField s = - case break (`elem` ":=") s of - (k,_:v) -> (k,v) - (k,[]) -> (k,"true") - diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs deleted file mode 100644 index 8eb1ba663..000000000 --- a/src/Text/Pandoc/Asciify.hs +++ /dev/null @@ -1,422 +0,0 @@ -{- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Function to convert accented latin letters to their unaccented -ascii equivalents (used in constructing HTML identifiers). --} -module Text.Pandoc.Asciify (toAsciiChar) -where -import qualified Data.Map as M -import Data.Char (isAscii) - -toAsciiChar :: Char -> Maybe Char -toAsciiChar c | isAscii c = Just c - | otherwise = M.lookup c asciiMap - -asciiMap :: M.Map Char Char -asciiMap = M.fromList - [('\192','A') - ,('\193','A') - ,('\194','A') - ,('\195','A') - ,('\196','A') - ,('\197','A') - ,('\199','C') - ,('\200','E') - ,('\201','E') - ,('\202','E') - ,('\203','E') - ,('\204','I') - ,('\205','I') - ,('\206','I') - ,('\207','I') - ,('\209','N') - ,('\210','O') - ,('\211','O') - ,('\212','O') - ,('\213','O') - ,('\214','O') - ,('\217','U') - ,('\218','U') - ,('\219','U') - ,('\220','U') - ,('\221','Y') - ,('\224','a') - ,('\225','a') - ,('\226','a') - ,('\227','a') - ,('\228','a') - ,('\229','a') - ,('\231','c') - ,('\232','e') - ,('\233','e') - ,('\234','e') - ,('\235','e') - ,('\236','i') - ,('\237','i') - ,('\238','i') - ,('\239','i') - ,('\241','n') - ,('\242','o') - ,('\243','o') - ,('\244','o') - ,('\245','o') - ,('\246','o') - ,('\249','u') - ,('\250','u') - ,('\251','u') - ,('\252','u') - ,('\253','y') - ,('\255','y') - ,('\256','A') - ,('\257','a') - ,('\258','A') - ,('\259','a') - ,('\260','A') - ,('\261','a') - ,('\262','C') - ,('\263','c') - ,('\264','C') - ,('\265','c') - ,('\266','C') - ,('\267','c') - ,('\268','C') - ,('\269','c') - ,('\270','D') - ,('\271','d') - ,('\274','E') - ,('\275','e') - ,('\276','E') - ,('\277','e') - ,('\278','E') - ,('\279','e') - ,('\280','E') - ,('\281','e') - ,('\282','E') - ,('\283','e') - ,('\284','G') - ,('\285','g') - ,('\286','G') - ,('\287','g') - ,('\288','G') - ,('\289','g') - ,('\290','G') - ,('\291','g') - ,('\292','H') - ,('\293','h') - ,('\296','I') - ,('\297','i') - ,('\298','I') - ,('\299','i') - ,('\300','I') - ,('\301','i') - ,('\302','I') - ,('\303','i') - ,('\304','I') - ,('\308','J') - ,('\309','j') - ,('\310','K') - ,('\311','k') - ,('\313','L') - ,('\314','l') - ,('\315','L') - ,('\316','l') - ,('\317','L') - ,('\318','l') - ,('\323','N') - ,('\324','n') - ,('\325','N') - ,('\326','n') - ,('\327','N') - ,('\328','n') - ,('\332','O') - ,('\333','o') - ,('\334','O') - ,('\335','o') - ,('\336','O') - ,('\337','o') - ,('\340','R') - ,('\341','r') - ,('\342','R') - ,('\343','r') - ,('\344','R') - ,('\345','r') - ,('\346','S') - ,('\347','s') - ,('\348','S') - ,('\349','s') - ,('\350','S') - ,('\351','s') - ,('\352','S') - ,('\353','s') - ,('\354','T') - ,('\355','t') - ,('\356','T') - ,('\357','t') - ,('\360','U') - ,('\361','u') - ,('\362','U') - ,('\363','u') - ,('\364','U') - ,('\365','u') - ,('\366','U') - ,('\367','u') - ,('\368','U') - ,('\369','u') - ,('\370','U') - ,('\371','u') - ,('\372','W') - ,('\373','w') - ,('\374','Y') - ,('\375','y') - ,('\376','Y') - ,('\377','Z') - ,('\378','z') - ,('\379','Z') - ,('\380','z') - ,('\381','Z') - ,('\382','z') - ,('\416','O') - ,('\417','o') - ,('\431','U') - ,('\432','u') - ,('\461','A') - ,('\462','a') - ,('\463','I') - ,('\464','i') - ,('\465','O') - ,('\466','o') - ,('\467','U') - ,('\468','u') - ,('\486','G') - ,('\487','g') - ,('\488','K') - ,('\489','k') - ,('\490','O') - ,('\491','o') - ,('\496','j') - ,('\500','G') - ,('\501','g') - ,('\504','N') - ,('\505','n') - ,('\512','A') - ,('\513','a') - ,('\514','A') - ,('\515','a') - ,('\516','E') - ,('\517','e') - ,('\518','E') - ,('\519','e') - ,('\520','I') - ,('\521','i') - ,('\522','I') - ,('\523','i') - ,('\524','O') - ,('\525','o') - ,('\526','O') - ,('\527','o') - ,('\528','R') - ,('\529','r') - ,('\530','R') - ,('\531','r') - ,('\532','U') - ,('\533','u') - ,('\534','U') - ,('\535','u') - ,('\536','S') - ,('\537','s') - ,('\538','T') - ,('\539','t') - ,('\542','H') - ,('\543','h') - ,('\550','A') - ,('\551','a') - ,('\552','E') - ,('\553','e') - ,('\558','O') - ,('\559','o') - ,('\562','Y') - ,('\563','y') - ,('\894',';') - ,('\7680','A') - ,('\7681','a') - ,('\7682','B') - ,('\7683','b') - ,('\7684','B') - ,('\7685','b') - ,('\7686','B') - ,('\7687','b') - ,('\7690','D') - ,('\7691','d') - ,('\7692','D') - ,('\7693','d') - ,('\7694','D') - ,('\7695','d') - ,('\7696','D') - ,('\7697','d') - ,('\7698','D') - ,('\7699','d') - ,('\7704','E') - ,('\7705','e') - ,('\7706','E') - ,('\7707','e') - ,('\7710','F') - ,('\7711','f') - ,('\7712','G') - ,('\7713','g') - ,('\7714','H') - ,('\7715','h') - ,('\7716','H') - ,('\7717','h') - ,('\7718','H') - ,('\7719','h') - ,('\7720','H') - ,('\7721','h') - ,('\7722','H') - ,('\7723','h') - ,('\7724','I') - ,('\7725','i') - ,('\7728','K') - ,('\7729','k') - ,('\7730','K') - ,('\7731','k') - ,('\7732','K') - ,('\7733','k') - ,('\7734','L') - ,('\7735','l') - ,('\7738','L') - ,('\7739','l') - ,('\7740','L') - ,('\7741','l') - ,('\7742','M') - ,('\7743','m') - ,('\7744','M') - ,('\7745','m') - ,('\7746','M') - ,('\7747','m') - ,('\7748','N') - ,('\7749','n') - ,('\7750','N') - ,('\7751','n') - ,('\7752','N') - ,('\7753','n') - ,('\7754','N') - ,('\7755','n') - ,('\7764','P') - ,('\7765','p') - ,('\7766','P') - ,('\7767','p') - ,('\7768','R') - ,('\7769','r') - ,('\7770','R') - ,('\7771','r') - ,('\7774','R') - ,('\7775','r') - ,('\7776','S') - ,('\7777','s') - ,('\7778','S') - ,('\7779','s') - ,('\7786','T') - ,('\7787','t') - ,('\7788','T') - ,('\7789','t') - ,('\7790','T') - ,('\7791','t') - ,('\7792','T') - ,('\7793','t') - ,('\7794','U') - ,('\7795','u') - ,('\7796','U') - ,('\7797','u') - ,('\7798','U') - ,('\7799','u') - ,('\7804','V') - ,('\7805','v') - ,('\7806','V') - ,('\7807','v') - ,('\7808','W') - ,('\7809','w') - ,('\7810','W') - ,('\7811','w') - ,('\7812','W') - ,('\7813','w') - ,('\7814','W') - ,('\7815','w') - ,('\7816','W') - ,('\7817','w') - ,('\7818','X') - ,('\7819','x') - ,('\7820','X') - ,('\7821','x') - ,('\7822','Y') - ,('\7823','y') - ,('\7824','Z') - ,('\7825','z') - ,('\7826','Z') - ,('\7827','z') - ,('\7828','Z') - ,('\7829','z') - ,('\7830','h') - ,('\7831','t') - ,('\7832','w') - ,('\7833','y') - ,('\7840','A') - ,('\7841','a') - ,('\7842','A') - ,('\7843','a') - ,('\7864','E') - ,('\7865','e') - ,('\7866','E') - ,('\7867','e') - ,('\7868','E') - ,('\7869','e') - ,('\7880','I') - ,('\7881','i') - ,('\7882','I') - ,('\7883','i') - ,('\7884','O') - ,('\7885','o') - ,('\7886','O') - ,('\7887','o') - ,('\7908','U') - ,('\7909','u') - ,('\7910','U') - ,('\7911','u') - ,('\7922','Y') - ,('\7923','y') - ,('\7924','Y') - ,('\7925','y') - ,('\7926','Y') - ,('\7927','y') - ,('\7928','Y') - ,('\7929','y') - ,('\8175','`') - ,('\8490','K') - ,('\8800','=') - ,('\8814','<') - ,('\8815','>') - ] diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs deleted file mode 100644 index f479ed9d0..000000000 --- a/src/Text/Pandoc/CSS.hs +++ /dev/null @@ -1,43 +0,0 @@ -module Text.Pandoc.CSS ( foldOrElse - , pickStyleAttrProps - , pickStylesToKVs - ) -where - -import Text.Pandoc.Shared (trim) -import Text.Parsec -import Text.Parsec.String - -ruleParser :: Parser (String, String) -ruleParser = do - p <- many1 (noneOf ":") <* char ':' - v <- many1 (noneOf ":;") <* (optional $ char ';') <* spaces - return (trim p, trim v) - -styleAttrParser :: Parser [(String, String)] -styleAttrParser = many1 ruleParser - -orElse :: Eq a => a -> a -> a -> a -orElse v x y = if v == x then y else x - -foldOrElse :: Eq a => a -> [a] -> a -foldOrElse v xs = foldr (orElse v) v xs - -eitherToMaybe :: Either a b -> Maybe b -eitherToMaybe (Right x) = Just x -eitherToMaybe _ = Nothing - --- | takes a list of keys/properties and a CSS string and --- returns the corresponding key-value-pairs. -pickStylesToKVs :: [String] -> String -> [(String, String)] -pickStylesToKVs props styleAttr = - case parse styleAttrParser "" styleAttr of - Left _ -> [] - Right styles -> filter (\s -> fst s `elem` props) styles - --- | takes a list of key/property synonyms and a CSS string and maybe --- returns the value of the first match (in order of the supplied list) -pickStyleAttrProps :: [String] -> String -> Maybe String -pickStyleAttrProps lookupProps styleAttr = do - styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr - foldOrElse Nothing $ map (flip lookup styles) lookupProps diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs deleted file mode 100644 index fb148666c..000000000 --- a/src/Text/Pandoc/Class.hs +++ /dev/null @@ -1,539 +0,0 @@ -{-# LANGUAGE DeriveFunctor, DeriveDataTypeable, TypeSynonymInstances, -FlexibleInstances, GeneralizedNewtypeDeriving, FlexibleContexts #-} - -{- -Copyright (C) 2016 Jesse Rosenthal <jrosenthal@jhu.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Class - Copyright : Copyright (C) 2016 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable - -Typeclass for pandoc readers and writers, allowing both IO and pure instances. --} - -module Text.Pandoc.Class ( PandocMonad(..) - , CommonState(..) - , PureState(..) - , getPureState - , getsPureState - , putPureState - , modifyPureState - , getPOSIXTime - , getZonedTime - , readFileFromDirs - , report - , getLog - , setVerbosity - , getMediaBag - , setMediaBag - , insertMedia - , fetchItem - , getInputFiles - , getOutputFile - , PandocIO(..) - , PandocPure(..) - , FileTree(..) - , FileInfo(..) - , runIO - , runIOorExplode - , runPure - , withMediaBag - ) where - -import Prelude hiding (readFile) -import System.Random (StdGen, next, mkStdGen) -import qualified System.Random as IO (newStdGen) -import Codec.Archive.Zip (Archive, fromArchive, emptyArchive) -import Data.Unique (hashUnique) -import qualified Data.Unique as IO (newUnique) -import qualified Text.Pandoc.Shared as IO ( readDataFile - , openURL ) -import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Compat.Time (UTCTime) -import Text.Pandoc.Logging -import Text.Parsec (ParsecT) -import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime) -import Text.Pandoc.MIME (MimeType, getMimeType) -import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds - , posixSecondsToUTCTime - , POSIXTime ) -import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc) -import Network.URI ( escapeURIString, nonStrictRelativeTo, - unEscapeString, parseURIReference, isAllowedInURI, - parseURI, URI(..) ) -import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) -import qualified Text.Pandoc.MediaBag as MB -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import qualified System.Environment as IO (lookupEnv) -import System.FilePath.Glob (match, compile) -import System.FilePath ((</>), takeExtension, dropExtension) -import qualified System.FilePath.Glob as IO (glob) -import qualified System.Directory as IO (getModificationTime) -import Control.Monad as M (fail) -import Control.Monad.Reader (ReaderT) -import Control.Monad.State -import Control.Monad.Except -import Control.Monad.Writer (WriterT) -import Control.Monad.RWS (RWST) -import Data.Word (Word8) -import Data.Default -import System.IO.Error -import System.IO (stderr) -import qualified Data.Map as M -import Text.Pandoc.Error -import Text.Printf (printf) - -class (Functor m, Applicative m, Monad m, MonadError PandocError m) - => PandocMonad m where - lookupEnv :: String -> m (Maybe String) - getCurrentTime :: m UTCTime - getCurrentTimeZone :: m TimeZone - newStdGen :: m StdGen - newUniqueHash :: m Int - openURL :: String -> m (B.ByteString, Maybe MimeType) - readFileLazy :: FilePath -> m BL.ByteString - readFileStrict :: FilePath -> m B.ByteString - readDataFile :: Maybe FilePath - -> FilePath - -> m B.ByteString - glob :: String -> m [FilePath] - getModificationTime :: FilePath -> m UTCTime - getCommonState :: m CommonState - putCommonState :: CommonState -> m () - - getsCommonState :: (CommonState -> a) -> m a - getsCommonState f = f <$> getCommonState - - modifyCommonState :: (CommonState -> CommonState) -> m () - modifyCommonState f = getCommonState >>= putCommonState . f - - logOutput :: LogMessage -> m () - --- Functions defined for all PandocMonad instances - -setVerbosity :: PandocMonad m => Verbosity -> m () -setVerbosity verbosity = - modifyCommonState $ \st -> st{ stVerbosity = verbosity } - -getLog :: PandocMonad m => m [LogMessage] -getLog = reverse <$> getsCommonState stLog - -report :: PandocMonad m => LogMessage -> m () -report msg = do - verbosity <- getsCommonState stVerbosity - let level = messageVerbosity msg - when (level <= verbosity) $ do - logOutput msg - unless (level == DEBUG) $ - modifyCommonState $ \st -> st{ stLog = msg : stLog st } - -setMediaBag :: PandocMonad m => MediaBag -> m () -setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb} - -getMediaBag :: PandocMonad m => m MediaBag -getMediaBag = getsCommonState stMediaBag - -insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m () -insertMedia fp mime bs = do - mb <- getsCommonState stMediaBag - let mb' = MB.insertMedia fp mime bs mb - modifyCommonState $ \st -> st{stMediaBag = mb' } - -getInputFiles :: PandocMonad m => m (Maybe [FilePath]) -getInputFiles = getsCommonState stInputFiles - -getOutputFile :: PandocMonad m => m (Maybe FilePath) -getOutputFile = getsCommonState stOutputFile - -getPOSIXTime :: (PandocMonad m) => m POSIXTime -getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime - -getZonedTime :: (PandocMonad m) => m ZonedTime -getZonedTime = do - t <- getCurrentTime - tz <- getCurrentTimeZone - return $ utcToZonedTime tz t - --- | Read file, checking in any number of directories. -readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String) -readFileFromDirs [] _ = return Nothing -readFileFromDirs (d:ds) f = catchError - ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f)) - (\_ -> readFileFromDirs ds f) - --- - -data CommonState = CommonState { stLog :: [LogMessage] - , stMediaBag :: MediaBag - , stInputFiles :: Maybe [FilePath] - , stOutputFile :: Maybe FilePath - , stVerbosity :: Verbosity - } - -instance Default CommonState where - def = CommonState { stLog = [] - , stMediaBag = mempty - , stInputFiles = Nothing - , stOutputFile = Nothing - , stVerbosity = WARNING - } - -runIO :: PandocIO a -> IO (Either PandocError a) -runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma - -withMediaBag :: PandocMonad m => m a -> m (a, MediaBag) -withMediaBag ma = ((,)) <$> ma <*> getMediaBag - -runIOorExplode :: PandocIO a -> IO a -runIOorExplode ma = runIO ma >>= handleError - -newtype PandocIO a = PandocIO { - unPandocIO :: ExceptT PandocError (StateT CommonState IO) a - } deriving ( MonadIO - , Functor - , Applicative - , Monad - , MonadError PandocError - ) - -instance PandocMonad PandocIO where - lookupEnv = liftIO . IO.lookupEnv - getCurrentTime = liftIO IO.getCurrentTime - getCurrentTimeZone = liftIO IO.getCurrentTimeZone - newStdGen = liftIO IO.newStdGen - newUniqueHash = hashUnique <$> (liftIO IO.newUnique) - openURL u = do - eitherRes <- liftIO $ (tryIOError $ IO.openURL u) - case eitherRes of - Right (Right res) -> return res - Right (Left _) -> throwError $ PandocFileReadError u - Left _ -> throwError $ PandocFileReadError u - readFileLazy s = do - eitherBS <- liftIO (tryIOError $ BL.readFile s) - case eitherBS of - Right bs -> return bs - Left _ -> throwError $ PandocFileReadError s - readFileStrict s = do - eitherBS <- liftIO (tryIOError $ B.readFile s) - case eitherBS of - Right bs -> return bs - Left _ -> throwError $ PandocFileReadError s - -- TODO: Make this more sensitive to the different sorts of failure - readDataFile mfp fname = do - eitherBS <- liftIO (tryIOError $ IO.readDataFile mfp fname) - case eitherBS of - Right bs -> return bs - Left _ -> throwError $ PandocFileReadError fname - glob = liftIO . IO.glob - getModificationTime fp = do - eitherMtime <- liftIO (tryIOError $ IO.getModificationTime fp) - case eitherMtime of - Right mtime -> return mtime - Left _ -> throwError $ PandocFileReadError fp - getCommonState = PandocIO $ lift get - putCommonState x = PandocIO $ lift $ put x - logOutput msg = - liftIO $ UTF8.hPutStrLn stderr $ printf "%-7s %s" - (show (messageVerbosity msg)) (showLogMessage msg) - --- | Specialized version of parseURIReference that disallows --- single-letter schemes. Reason: these are usually windows absolute --- paths. -parseURIReference' :: String -> Maybe URI -parseURIReference' s = - case parseURIReference s of - Just u - | length (uriScheme u) > 2 -> Just u - | null (uriScheme u) -> Just u -- protocol-relative - _ -> Nothing - --- | Fetch an image or other item from the local filesystem or the net. --- Returns raw content and maybe mime type. -fetchItem :: PandocMonad m - => Maybe String - -> String - -> m (B.ByteString, Maybe MimeType) -fetchItem sourceURL s = do - mediabag <- getMediaBag - case lookupMedia s mediabag of - Just (mime, bs) -> return $ (BL.toStrict bs, Just mime) - Nothing -> downloadOrRead sourceURL s - -downloadOrRead :: PandocMonad m - => Maybe String - -> String - -> m (B.ByteString, Maybe MimeType) -downloadOrRead sourceURL s = do - case (sourceURL >>= parseURIReference' . - ensureEscaped, ensureEscaped s) of - (Just u, s') -> -- try fetching from relative path at source - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u - Nothing -> openURL s' -- will throw error - (Nothing, s'@('/':'/':_)) -> -- protocol-relative URI - case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon - Nothing -> openURL s' -- will throw error - (Nothing, s') -> - case parseURI s' of -- requires absolute URI - -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (show u') - Just u' | uriScheme u' == "file:" -> - readLocalFile $ dropWhile (=='/') (uriPath u') - _ -> readLocalFile fp -- get from local file system - where readLocalFile f = do - cont <- readFileStrict f - return (cont, mime) - httpcolon = URI{ uriScheme = "http:", - uriAuthority = Nothing, - uriPath = "", - uriQuery = "", - uriFragment = "" } - dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ dropFragmentAndQuery s - mime = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" - x -> getMimeType x - ensureEscaped = escapeURIString isAllowedInURI . map convertSlash - convertSlash '\\' = '/' - convertSlash x = x - -data PureState = PureState { stStdGen :: StdGen - , stWord8Store :: [Word8] -- should be - -- inifinite, - -- i.e. [1..] - , stUniqStore :: [Int] -- should be - -- inifinite and - -- contain every - -- element at most - -- once, e.g. [1..] - , stEnv :: [(String, String)] - , stTime :: UTCTime - , stTimeZone :: TimeZone - , stReferenceDocx :: Archive - , stReferenceODT :: Archive - , stFiles :: FileTree - , stUserDataDir :: FileTree - , stCabalDataDir :: FileTree - , stFontFiles :: [FilePath] - } - -instance Default PureState where - def = PureState { stStdGen = mkStdGen 1848 - , stWord8Store = [1..] - , stUniqStore = [1..] - , stEnv = [("USER", "pandoc-user")] - , stTime = posixSecondsToUTCTime 0 - , stTimeZone = utc - , stReferenceDocx = emptyArchive - , stReferenceODT = emptyArchive - , stFiles = mempty - , stUserDataDir = mempty - , stCabalDataDir = mempty - , stFontFiles = [] - } - - -getPureState :: PandocPure PureState -getPureState = PandocPure $ lift $ lift $ get - -getsPureState :: (PureState -> a) -> PandocPure a -getsPureState f = f <$> getPureState - -putPureState :: PureState -> PandocPure () -putPureState ps= PandocPure $ lift $ lift $ put ps - -modifyPureState :: (PureState -> PureState) -> PandocPure () -modifyPureState f = PandocPure $ lift $ lift $ modify f - - -data FileInfo = FileInfo { infoFileMTime :: UTCTime - , infoFileContents :: B.ByteString - } - -newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} - deriving (Monoid) - -getFileInfo :: FilePath -> FileTree -> Maybe FileInfo -getFileInfo fp tree = M.lookup fp $ unFileTree tree - - -newtype PandocPure a = PandocPure { - unPandocPure :: ExceptT PandocError - (StateT CommonState (State PureState)) a - } deriving ( Functor - , Applicative - , Monad - , MonadError PandocError - ) - -runPure :: PandocPure a -> Either PandocError a -runPure x = flip evalState def $ - flip evalStateT def $ - runExceptT $ - unPandocPure x - -instance PandocMonad PandocPure where - lookupEnv s = do - env <- getsPureState stEnv - return (lookup s env) - - getCurrentTime = getsPureState stTime - - getCurrentTimeZone = getsPureState stTimeZone - - newStdGen = do - g <- getsPureState stStdGen - let (_, nxtGen) = next g - modifyPureState $ \st -> st { stStdGen = nxtGen } - return g - - newUniqueHash = do - uniqs <- getsPureState stUniqStore - case uniqs of - u : us -> do - modifyPureState $ \st -> st { stUniqStore = us } - return u - _ -> M.fail "uniq store ran out of elements" - openURL _ = throwError $ PandocSomeError "Cannot open URL in PandocPure" - readFileLazy fp = do - fps <- getsPureState stFiles - case infoFileContents <$> getFileInfo fp fps of - Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocFileReadError fp - readFileStrict fp = do - fps <- getsPureState stFiles - case infoFileContents <$> getFileInfo fp fps of - Just bs -> return bs - Nothing -> throwError $ PandocFileReadError fp - readDataFile Nothing "reference.docx" = do - (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx - readDataFile Nothing "reference.odt" = do - (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT - readDataFile Nothing fname = do - let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname - readFileStrict fname' - readDataFile (Just userDir) fname = do - userDirFiles <- getsPureState stUserDataDir - case infoFileContents <$> (getFileInfo (userDir </> fname) userDirFiles) of - Just bs -> return bs - Nothing -> readDataFile Nothing fname - - glob s = do - fontFiles <- getsPureState stFontFiles - return (filter (match (compile s)) fontFiles) - - getModificationTime fp = do - fps <- getsPureState stFiles - case infoFileMTime <$> (getFileInfo fp fps) of - Just tm -> return tm - Nothing -> throwError $ PandocFileReadError fp - - getCommonState = PandocPure $ lift $ get - putCommonState x = PandocPure $ lift $ put x - - logOutput _msg = return () - -instance PandocMonad m => PandocMonad (ParsecT s st m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - readDataFile mbuserdir = lift . readDataFile mbuserdir - glob = lift . glob - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - -instance PandocMonad m => PandocMonad (ReaderT r m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - readDataFile mbuserdir = lift . readDataFile mbuserdir - glob = lift . glob - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - -instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - readDataFile mbuserdir = lift . readDataFile mbuserdir - glob = lift . glob - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - -instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - readDataFile mbuserdir = lift . readDataFile mbuserdir - glob = lift . glob - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - -instance PandocMonad m => PandocMonad (StateT st m) where - lookupEnv = lift . lookupEnv - getCurrentTime = lift getCurrentTime - getCurrentTimeZone = lift getCurrentTimeZone - newStdGen = lift newStdGen - newUniqueHash = lift newUniqueHash - openURL = lift . openURL - readFileLazy = lift . readFileLazy - readFileStrict = lift . readFileStrict - readDataFile mbuserdir = lift . readDataFile mbuserdir - glob = lift . glob - getModificationTime = lift . getModificationTime - getCommonState = lift getCommonState - putCommonState = lift . putCommonState - logOutput = lift . logOutput - diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs deleted file mode 100644 index b1cde82a4..000000000 --- a/src/Text/Pandoc/Compat/Time.hs +++ /dev/null @@ -1,30 +0,0 @@ -{-# LANGUAGE CPP #-} - -{- -This compatibility module is needed because, in time 1.5, the -`defaultTimeLocale` function was moved from System.Locale (in the -old-locale library) into Data.Time. - -We support both behaviors because time 1.4 is a boot library for GHC -7.8. time 1.5 is a boot library for GHC 7.10. - -When support is dropped for GHC 7.8, this module may be obsoleted. --} - -#if MIN_VERSION_time(1,5,0) -module Text.Pandoc.Compat.Time ( - module Data.Time -) -where -import Data.Time - -#else -module Text.Pandoc.Compat.Time ( - module Data.Time, - defaultTimeLocale -) -where -import Data.Time -import System.Locale ( defaultTimeLocale ) - -#endif diff --git a/src/Text/Pandoc/Data.hsb b/src/Text/Pandoc/Data.hsb deleted file mode 100644 index 8786647c5..000000000 --- a/src/Text/Pandoc/Data.hsb +++ /dev/null @@ -1,15 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} --- to be processed using hsb2hs -module Text.Pandoc.Data (dataFiles) where -import qualified Data.ByteString as B -import System.FilePath (splitDirectories) -import qualified System.FilePath.Posix as Posix - --- We ensure that the data files are stored using Posix --- path separators (/), even on Windows. -dataFiles :: [(FilePath, B.ByteString)] -dataFiles = map (\(fp, contents) -> - (Posix.joinPath (splitDirectories fp), contents)) dataFiles' - -dataFiles' :: [(FilePath, B.ByteString)] -dataFiles' = ("MANUAL.txt", %blob "MANUAL.txt") : %blobs "data" diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs deleted file mode 100644 index c9f368abc..000000000 --- a/src/Text/Pandoc/Emoji.hs +++ /dev/null @@ -1,906 +0,0 @@ -{- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Emoji - Copyright : Copyright (C) 2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Emoji symbol lookup from canonical string identifier. --} -module Text.Pandoc.Emoji ( emojis ) where -import qualified Data.Map as M - -emojis :: M.Map String String -emojis = M.fromList - [("+1","\128077") - ,("-1","\128078") - ,("100","\128175") - ,("1234","\128290") - ,("8ball","\127921") - ,("a","\127344\65039") - ,("ab","\127374") - ,("abc","\128292") - ,("abcd","\128289") - ,("accept","\127569") - ,("aerial_tramway","\128673") - ,("airplane","\9992\65039") - ,("alarm_clock","\9200") - ,("alien","\128125") - ,("ambulance","\128657") - ,("anchor","\9875") - ,("angel","\128124") - ,("anger","\128162") - ,("angry","\128544") - ,("anguished","\128551") - ,("ant","\128028") - ,("apple","\127822") - ,("aquarius","\9810") - ,("aries","\9800") - ,("arrow_backward","\9664\65039") - ,("arrow_double_down","\9196") - ,("arrow_double_up","\9195") - ,("arrow_down","\11015\65039") - ,("arrow_down_small","\128317") - ,("arrow_forward","\9654\65039") - ,("arrow_heading_down","\10549\65039") - ,("arrow_heading_up","\10548\65039") - ,("arrow_left","\11013\65039") - ,("arrow_lower_left","\8601\65039") - ,("arrow_lower_right","\8600\65039") - ,("arrow_right","\10145\65039") - ,("arrow_right_hook","\8618\65039") - ,("arrow_up","\11014\65039") - ,("arrow_up_down","\8597\65039") - ,("arrow_up_small","\128316") - ,("arrow_upper_left","\8598\65039") - ,("arrow_upper_right","\8599\65039") - ,("arrows_clockwise","\128259") - ,("arrows_counterclockwise","\128260") - ,("art","\127912") - ,("articulated_lorry","\128667") - ,("astonished","\128562") - ,("athletic_shoe","\128095") - ,("atm","\127975") - ,("b","\127345\65039") - ,("baby","\128118") - ,("baby_bottle","\127868") - ,("baby_chick","\128036") - ,("baby_symbol","\128700") - ,("back","\128281") - ,("baggage_claim","\128708") - ,("balloon","\127880") - ,("ballot_box_with_check","\9745\65039") - ,("bamboo","\127885") - ,("banana","\127820") - ,("bangbang","\8252\65039") - ,("bank","\127974") - ,("bar_chart","\128202") - ,("barber","\128136") - ,("baseball","\9918\65039") - ,("basketball","\127936") - ,("bath","\128704") - ,("bathtub","\128705") - ,("battery","\128267") - ,("bear","\128059") - ,("bee","\128029") - ,("beer","\127866") - ,("beers","\127867") - ,("beetle","\128030") - ,("beginner","\128304") - ,("bell","\128276") - ,("bento","\127857") - ,("bicyclist","\128692") - ,("bike","\128690") - ,("bikini","\128089") - ,("bird","\128038") - ,("birthday","\127874") - ,("black_circle","\9899") - ,("black_joker","\127183") - ,("black_large_square","\11035") - ,("black_medium_small_square","\9726") - ,("black_medium_square","\9724\65039") - ,("black_nib","\10002\65039") - ,("black_small_square","\9642\65039") - ,("black_square_button","\128306") - ,("blossom","\127804") - ,("blowfish","\128033") - ,("blue_book","\128216") - ,("blue_car","\128665") - ,("blue_heart","\128153") - ,("blush","\128522") - ,("boar","\128023") - ,("boat","\9973") - ,("bomb","\128163") - ,("book","\128214") - ,("bookmark","\128278") - ,("bookmark_tabs","\128209") - ,("books","\128218") - ,("boom","\128165") - ,("boot","\128098") - ,("bouquet","\128144") - ,("bow","\128583") - ,("bowling","\127923") - ,("boy","\128102") - ,("bread","\127838") - ,("bride_with_veil","\128112") - ,("bridge_at_night","\127753") - ,("briefcase","\128188") - ,("broken_heart","\128148") - ,("bug","\128027") - ,("bulb","\128161") - ,("bullettrain_front","\128645") - ,("bullettrain_side","\128644") - ,("bus","\128652") - ,("busstop","\128655") - ,("bust_in_silhouette","\128100") - ,("busts_in_silhouette","\128101") - ,("cactus","\127797") - ,("cake","\127856") - ,("calendar","\128198") - ,("calling","\128242") - ,("camel","\128043") - ,("camera","\128247") - ,("cancer","\9803") - ,("candy","\127852") - ,("capital_abcd","\128288") - ,("capricorn","\9809") - ,("car","\128663") - ,("card_index","\128199") - ,("carousel_horse","\127904") - ,("cat","\128049") - ,("cat2","\128008") - ,("cd","\128191") - ,("chart","\128185") - ,("chart_with_downwards_trend","\128201") - ,("chart_with_upwards_trend","\128200") - ,("checkered_flag","\127937") - ,("cherries","\127826") - ,("cherry_blossom","\127800") - ,("chestnut","\127792") - ,("chicken","\128020") - ,("children_crossing","\128696") - ,("chocolate_bar","\127851") - ,("christmas_tree","\127876") - ,("church","\9962") - ,("cinema","\127910") - ,("circus_tent","\127914") - ,("city_sunrise","\127751") - ,("city_sunset","\127750") - ,("cl","\127377") - ,("clap","\128079") - ,("clapper","\127916") - ,("clipboard","\128203") - ,("clock1","\128336") - ,("clock10","\128345") - ,("clock1030","\128357") - ,("clock11","\128346") - ,("clock1130","\128358") - ,("clock12","\128347") - ,("clock1230","\128359") - ,("clock130","\128348") - ,("clock2","\128337") - ,("clock230","\128349") - ,("clock3","\128338") - ,("clock330","\128350") - ,("clock4","\128339") - ,("clock430","\128351") - ,("clock5","\128340") - ,("clock530","\128352") - ,("clock6","\128341") - ,("clock630","\128353") - ,("clock7","\128342") - ,("clock730","\128354") - ,("clock8","\128343") - ,("clock830","\128355") - ,("clock9","\128344") - ,("clock930","\128356") - ,("closed_book","\128213") - ,("closed_lock_with_key","\128272") - ,("closed_umbrella","\127746") - ,("cloud","\9729\65039") - ,("clubs","\9827\65039") - ,("cn","\127464\127475") - ,("cocktail","\127864") - ,("coffee","\9749") - ,("cold_sweat","\128560") - ,("collision","\128165") - ,("computer","\128187") - ,("confetti_ball","\127882") - ,("confounded","\128534") - ,("confused","\128533") - ,("congratulations","\12951\65039") - ,("construction","\128679") - ,("construction_worker","\128119") - ,("convenience_store","\127978") - ,("cookie","\127850") - ,("cool","\127378") - ,("cop","\128110") - ,("copyright","\169\65039") - ,("corn","\127805") - ,("couple","\128107") - ,("couple_with_heart","\128145") - ,("couplekiss","\128143") - ,("cow","\128046") - ,("cow2","\128004") - ,("credit_card","\128179") - ,("crescent_moon","\127769") - ,("crocodile","\128010") - ,("crossed_flags","\127884") - ,("crown","\128081") - ,("cry","\128546") - ,("crying_cat_face","\128575") - ,("crystal_ball","\128302") - ,("cupid","\128152") - ,("curly_loop","\10160") - ,("currency_exchange","\128177") - ,("curry","\127835") - ,("custard","\127854") - ,("customs","\128707") - ,("cyclone","\127744") - ,("dancer","\128131") - ,("dancers","\128111") - ,("dango","\127841") - ,("dart","\127919") - ,("dash","\128168") - ,("date","\128197") - ,("de","\127465\127466") - ,("deciduous_tree","\127795") - ,("department_store","\127980") - ,("diamond_shape_with_a_dot_inside","\128160") - ,("diamonds","\9830\65039") - ,("disappointed","\128542") - ,("disappointed_relieved","\128549") - ,("dizzy","\128171") - ,("dizzy_face","\128565") - ,("do_not_litter","\128687") - ,("dog","\128054") - ,("dog2","\128021") - ,("dollar","\128181") - ,("dolls","\127886") - ,("dolphin","\128044") - ,("door","\128682") - ,("doughnut","\127849") - ,("dragon","\128009") - ,("dragon_face","\128050") - ,("dress","\128087") - ,("dromedary_camel","\128042") - ,("droplet","\128167") - ,("dvd","\128192") - ,("e-mail","\128231") - ,("ear","\128066") - ,("ear_of_rice","\127806") - ,("earth_africa","\127757") - ,("earth_americas","\127758") - ,("earth_asia","\127759") - ,("egg","\127859") - ,("eggplant","\127814") - ,("eight","8\65039\8419") - ,("eight_pointed_black_star","\10036\65039") - ,("eight_spoked_asterisk","\10035\65039") - ,("electric_plug","\128268") - ,("elephant","\128024") - ,("email","\9993\65039") - ,("end","\128282") - ,("envelope","\9993\65039") - ,("envelope_with_arrow","\128233") - ,("es","\127466\127480") - ,("euro","\128182") - ,("european_castle","\127984") - ,("european_post_office","\127972") - ,("evergreen_tree","\127794") - ,("exclamation","\10071") - ,("expressionless","\128529") - ,("eyeglasses","\128083") - ,("eyes","\128064") - ,("facepunch","\128074") - ,("factory","\127981") - ,("fallen_leaf","\127810") - ,("family","\128106") - ,("fast_forward","\9193") - ,("fax","\128224") - ,("fearful","\128552") - ,("feet","\128062") - ,("ferris_wheel","\127905") - ,("file_folder","\128193") - ,("fire","\128293") - ,("fire_engine","\128658") - ,("fireworks","\127878") - ,("first_quarter_moon","\127763") - ,("first_quarter_moon_with_face","\127771") - ,("fish","\128031") - ,("fish_cake","\127845") - ,("fishing_pole_and_fish","\127907") - ,("fist","\9994") - ,("five","5\65039\8419") - ,("flags","\127887") - ,("flashlight","\128294") - ,("flipper","\128044") - ,("floppy_disk","\128190") - ,("flower_playing_cards","\127924") - ,("flushed","\128563") - ,("foggy","\127745") - ,("football","\127944") - ,("footprints","\128099") - ,("fork_and_knife","\127860") - ,("fountain","\9970") - ,("four","4\65039\8419") - ,("four_leaf_clover","\127808") - ,("fr","\127467\127479") - ,("free","\127379") - ,("fried_shrimp","\127844") - ,("fries","\127839") - ,("frog","\128056") - ,("frowning","\128550") - ,("fuelpump","\9981") - ,("full_moon","\127765") - ,("full_moon_with_face","\127773") - ,("game_die","\127922") - ,("gb","\127468\127463") - ,("gem","\128142") - ,("gemini","\9802") - ,("ghost","\128123") - ,("gift","\127873") - ,("gift_heart","\128157") - ,("girl","\128103") - ,("globe_with_meridians","\127760") - ,("goat","\128016") - ,("golf","\9971") - ,("grapes","\127815") - ,("green_apple","\127823") - ,("green_book","\128215") - ,("green_heart","\128154") - ,("grey_exclamation","\10069") - ,("grey_question","\10068") - ,("grimacing","\128556") - ,("grin","\128513") - ,("grinning","\128512") - ,("guardsman","\128130") - ,("guitar","\127928") - ,("gun","\128299") - ,("haircut","\128135") - ,("hamburger","\127828") - ,("hammer","\128296") - ,("hamster","\128057") - ,("hand","\9995") - ,("handbag","\128092") - ,("hankey","\128169") - ,("hash","#\65039\8419") - ,("hatched_chick","\128037") - ,("hatching_chick","\128035") - ,("headphones","\127911") - ,("hear_no_evil","\128585") - ,("heart","\10084\65039") - ,("heart_decoration","\128159") - ,("heart_eyes","\128525") - ,("heart_eyes_cat","\128571") - ,("heartbeat","\128147") - ,("heartpulse","\128151") - ,("hearts","\9829\65039") - ,("heavy_check_mark","\10004\65039") - ,("heavy_division_sign","\10135") - ,("heavy_dollar_sign","\128178") - ,("heavy_exclamation_mark","\10071") - ,("heavy_minus_sign","\10134") - ,("heavy_multiplication_x","\10006\65039") - ,("heavy_plus_sign","\10133") - ,("helicopter","\128641") - ,("herb","\127807") - ,("hibiscus","\127802") - ,("high_brightness","\128262") - ,("high_heel","\128096") - ,("hocho","\128298") - ,("honey_pot","\127855") - ,("honeybee","\128029") - ,("horse","\128052") - ,("horse_racing","\127943") - ,("hospital","\127973") - ,("hotel","\127976") - ,("hotsprings","\9832\65039") - ,("hourglass","\8987") - ,("hourglass_flowing_sand","\9203") - ,("house","\127968") - ,("house_with_garden","\127969") - ,("hushed","\128559") - ,("ice_cream","\127848") - ,("icecream","\127846") - ,("id","\127380") - ,("ideograph_advantage","\127568") - ,("imp","\128127") - ,("inbox_tray","\128229") - ,("incoming_envelope","\128232") - ,("information_desk_person","\128129") - ,("information_source","\8505\65039") - ,("innocent","\128519") - ,("interrobang","\8265\65039") - ,("iphone","\128241") - ,("it","\127470\127481") - ,("izakaya_lantern","\127982") - ,("jack_o_lantern","\127875") - ,("japan","\128510") - ,("japanese_castle","\127983") - ,("japanese_goblin","\128122") - ,("japanese_ogre","\128121") - ,("jeans","\128086") - ,("joy","\128514") - ,("joy_cat","\128569") - ,("jp","\127471\127477") - ,("key","\128273") - ,("keycap_ten","\128287") - ,("kimono","\128088") - ,("kiss","\128139") - ,("kissing","\128535") - ,("kissing_cat","\128573") - ,("kissing_closed_eyes","\128538") - ,("kissing_heart","\128536") - ,("kissing_smiling_eyes","\128537") - ,("knife","\128298") - ,("koala","\128040") - ,("koko","\127489") - ,("kr","\127472\127479") - ,("lantern","\127982") - ,("large_blue_circle","\128309") - ,("large_blue_diamond","\128311") - ,("large_orange_diamond","\128310") - ,("last_quarter_moon","\127767") - ,("last_quarter_moon_with_face","\127772") - ,("laughing","\128518") - ,("leaves","\127811") - ,("ledger","\128210") - ,("left_luggage","\128709") - ,("left_right_arrow","\8596\65039") - ,("leftwards_arrow_with_hook","\8617\65039") - ,("lemon","\127819") - ,("leo","\9804") - ,("leopard","\128006") - ,("libra","\9806") - ,("light_rail","\128648") - ,("link","\128279") - ,("lips","\128068") - ,("lipstick","\128132") - ,("lock","\128274") - ,("lock_with_ink_pen","\128271") - ,("lollipop","\127853") - ,("loop","\10175") - ,("loud_sound","\128266") - ,("loudspeaker","\128226") - ,("love_hotel","\127977") - ,("love_letter","\128140") - ,("low_brightness","\128261") - ,("m","\9410\65039") - ,("mag","\128269") - ,("mag_right","\128270") - ,("mahjong","\126980") - ,("mailbox","\128235") - ,("mailbox_closed","\128234") - ,("mailbox_with_mail","\128236") - ,("mailbox_with_no_mail","\128237") - ,("man","\128104") - ,("man_with_gua_pi_mao","\128114") - ,("man_with_turban","\128115") - ,("mans_shoe","\128094") - ,("maple_leaf","\127809") - ,("mask","\128567") - ,("massage","\128134") - ,("meat_on_bone","\127830") - ,("mega","\128227") - ,("melon","\127816") - ,("memo","\128221") - ,("mens","\128697") - ,("metro","\128647") - ,("microphone","\127908") - ,("microscope","\128300") - ,("milky_way","\127756") - ,("minibus","\128656") - ,("minidisc","\128189") - ,("mobile_phone_off","\128244") - ,("money_with_wings","\128184") - ,("moneybag","\128176") - ,("monkey","\128018") - ,("monkey_face","\128053") - ,("monorail","\128669") - ,("moon","\127764") - ,("mortar_board","\127891") - ,("mount_fuji","\128507") - ,("mountain_bicyclist","\128693") - ,("mountain_cableway","\128672") - ,("mountain_railway","\128670") - ,("mouse","\128045") - ,("mouse2","\128001") - ,("movie_camera","\127909") - ,("moyai","\128511") - ,("muscle","\128170") - ,("mushroom","\127812") - ,("musical_keyboard","\127929") - ,("musical_note","\127925") - ,("musical_score","\127932") - ,("mute","\128263") - ,("nail_care","\128133") - ,("name_badge","\128219") - ,("necktie","\128084") - ,("negative_squared_cross_mark","\10062") - ,("neutral_face","\128528") - ,("new","\127381") - ,("new_moon","\127761") - ,("new_moon_with_face","\127770") - ,("newspaper","\128240") - ,("ng","\127382") - ,("night_with_stars","\127747") - ,("nine","9\65039\8419") - ,("no_bell","\128277") - ,("no_bicycles","\128691") - ,("no_entry","\9940") - ,("no_entry_sign","\128683") - ,("no_good","\128581") - ,("no_mobile_phones","\128245") - ,("no_mouth","\128566") - ,("no_pedestrians","\128695") - ,("no_smoking","\128685") - ,("non-potable_water","\128689") - ,("nose","\128067") - ,("notebook","\128211") - ,("notebook_with_decorative_cover","\128212") - ,("notes","\127926") - ,("nut_and_bolt","\128297") - ,("o","\11093") - ,("o2","\127358\65039") - ,("ocean","\127754") - ,("octopus","\128025") - ,("oden","\127842") - ,("office","\127970") - ,("ok","\127383") - ,("ok_hand","\128076") - ,("ok_woman","\128582") - ,("older_man","\128116") - ,("older_woman","\128117") - ,("on","\128283") - ,("oncoming_automobile","\128664") - ,("oncoming_bus","\128653") - ,("oncoming_police_car","\128660") - ,("oncoming_taxi","\128662") - ,("one","1\65039\8419") - ,("open_book","\128214") - ,("open_file_folder","\128194") - ,("open_hands","\128080") - ,("open_mouth","\128558") - ,("ophiuchus","\9934") - ,("orange_book","\128217") - ,("outbox_tray","\128228") - ,("ox","\128002") - ,("package","\128230") - ,("page_facing_up","\128196") - ,("page_with_curl","\128195") - ,("pager","\128223") - ,("palm_tree","\127796") - ,("panda_face","\128060") - ,("paperclip","\128206") - ,("parking","\127359\65039") - ,("part_alternation_mark","\12349\65039") - ,("partly_sunny","\9925") - ,("passport_control","\128706") - ,("paw_prints","\128062") - ,("peach","\127825") - ,("pear","\127824") - ,("pencil","\128221") - ,("pencil2","\9999\65039") - ,("penguin","\128039") - ,("pensive","\128532") - ,("performing_arts","\127917") - ,("persevere","\128547") - ,("person_frowning","\128589") - ,("person_with_blond_hair","\128113") - ,("person_with_pouting_face","\128590") - ,("phone","\9742\65039") - ,("pig","\128055") - ,("pig2","\128022") - ,("pig_nose","\128061") - ,("pill","\128138") - ,("pineapple","\127821") - ,("pisces","\9811") - ,("pizza","\127829") - ,("point_down","\128071") - ,("point_left","\128072") - ,("point_right","\128073") - ,("point_up","\9757\65039") - ,("point_up_2","\128070") - ,("police_car","\128659") - ,("poodle","\128041") - ,("poop","\128169") - ,("post_office","\127971") - ,("postal_horn","\128239") - ,("postbox","\128238") - ,("potable_water","\128688") - ,("pouch","\128093") - ,("poultry_leg","\127831") - ,("pound","\128183") - ,("pouting_cat","\128574") - ,("pray","\128591") - ,("princess","\128120") - ,("punch","\128074") - ,("purple_heart","\128156") - ,("purse","\128091") - ,("pushpin","\128204") - ,("put_litter_in_its_place","\128686") - ,("question","\10067") - ,("rabbit","\128048") - ,("rabbit2","\128007") - ,("racehorse","\128014") - ,("radio","\128251") - ,("radio_button","\128280") - ,("rage","\128545") - ,("railway_car","\128643") - ,("rainbow","\127752") - ,("raised_hand","\9995") - ,("raised_hands","\128588") - ,("raising_hand","\128587") - ,("ram","\128015") - ,("ramen","\127836") - ,("rat","\128000") - ,("recycle","\9851\65039") - ,("red_car","\128663") - ,("red_circle","\128308") - ,("registered","\174\65039") - ,("relaxed","\9786\65039") - ,("relieved","\128524") - ,("repeat","\128257") - ,("repeat_one","\128258") - ,("restroom","\128699") - ,("revolving_hearts","\128158") - ,("rewind","\9194") - ,("ribbon","\127872") - ,("rice","\127834") - ,("rice_ball","\127833") - ,("rice_cracker","\127832") - ,("rice_scene","\127889") - ,("ring","\128141") - ,("rocket","\128640") - ,("roller_coaster","\127906") - ,("rooster","\128019") - ,("rose","\127801") - ,("rotating_light","\128680") - ,("round_pushpin","\128205") - ,("rowboat","\128675") - ,("ru","\127479\127482") - ,("rugby_football","\127945") - ,("runner","\127939") - ,("running","\127939") - ,("running_shirt_with_sash","\127933") - ,("sa","\127490\65039") - ,("sagittarius","\9808") - ,("sailboat","\9973") - ,("sake","\127862") - ,("sandal","\128097") - ,("santa","\127877") - ,("satellite","\128225") - ,("satisfied","\128518") - ,("saxophone","\127927") - ,("school","\127979") - ,("school_satchel","\127890") - ,("scissors","\9986\65039") - ,("scorpius","\9807") - ,("scream","\128561") - ,("scream_cat","\128576") - ,("scroll","\128220") - ,("seat","\128186") - ,("secret","\12953\65039") - ,("see_no_evil","\128584") - ,("seedling","\127793") - ,("seven","7\65039\8419") - ,("shaved_ice","\127847") - ,("sheep","\128017") - ,("shell","\128026") - ,("ship","\128674") - ,("shirt","\128085") - ,("shit","\128169") - ,("shoe","\128094") - ,("shower","\128703") - ,("signal_strength","\128246") - ,("six","6\65039\8419") - ,("six_pointed_star","\128303") - ,("ski","\127935") - ,("skull","\128128") - ,("sleeping","\128564") - ,("sleepy","\128554") - ,("slot_machine","\127920") - ,("small_blue_diamond","\128313") - ,("small_orange_diamond","\128312") - ,("small_red_triangle","\128314") - ,("small_red_triangle_down","\128315") - ,("smile","\128516") - ,("smile_cat","\128568") - ,("smiley","\128515") - ,("smiley_cat","\128570") - ,("smiling_imp","\128520") - ,("smirk","\128527") - ,("smirk_cat","\128572") - ,("smoking","\128684") - ,("snail","\128012") - ,("snake","\128013") - ,("snowboarder","\127938") - ,("snowflake","\10052\65039") - ,("snowman","\9924") - ,("sob","\128557") - ,("soccer","\9917") - ,("soon","\128284") - ,("sos","\127384") - ,("sound","\128265") - ,("space_invader","\128126") - ,("spades","\9824\65039") - ,("spaghetti","\127837") - ,("sparkle","\10055\65039") - ,("sparkler","\127879") - ,("sparkles","\10024") - ,("sparkling_heart","\128150") - ,("speak_no_evil","\128586") - ,("speaker","\128264") - ,("speech_balloon","\128172") - ,("speedboat","\128676") - ,("star","\11088") - ,("star2","\127775") - ,("stars","\127776") - ,("station","\128649") - ,("statue_of_liberty","\128509") - ,("steam_locomotive","\128642") - ,("stew","\127858") - ,("straight_ruler","\128207") - ,("strawberry","\127827") - ,("stuck_out_tongue","\128539") - ,("stuck_out_tongue_closed_eyes","\128541") - ,("stuck_out_tongue_winking_eye","\128540") - ,("sun_with_face","\127774") - ,("sunflower","\127803") - ,("sunglasses","\128526") - ,("sunny","\9728\65039") - ,("sunrise","\127749") - ,("sunrise_over_mountains","\127748") - ,("surfer","\127940") - ,("sushi","\127843") - ,("suspension_railway","\128671") - ,("sweat","\128531") - ,("sweat_drops","\128166") - ,("sweat_smile","\128517") - ,("sweet_potato","\127840") - ,("swimmer","\127946") - ,("symbols","\128291") - ,("syringe","\128137") - ,("tada","\127881") - ,("tanabata_tree","\127883") - ,("tangerine","\127818") - ,("taurus","\9801") - ,("taxi","\128661") - ,("tea","\127861") - ,("telephone","\9742\65039") - ,("telephone_receiver","\128222") - ,("telescope","\128301") - ,("tennis","\127934") - ,("tent","\9978") - ,("thought_balloon","\128173") - ,("three","3\65039\8419") - ,("thumbsdown","\128078") - ,("thumbsup","\128077") - ,("ticket","\127915") - ,("tiger","\128047") - ,("tiger2","\128005") - ,("tired_face","\128555") - ,("tm","\8482\65039") - ,("toilet","\128701") - ,("tokyo_tower","\128508") - ,("tomato","\127813") - ,("tongue","\128069") - ,("top","\128285") - ,("tophat","\127913") - ,("tractor","\128668") - ,("traffic_light","\128677") - ,("train","\128651") - ,("train2","\128646") - ,("tram","\128650") - ,("triangular_flag_on_post","\128681") - ,("triangular_ruler","\128208") - ,("trident","\128305") - ,("triumph","\128548") - ,("trolleybus","\128654") - ,("trophy","\127942") - ,("tropical_drink","\127865") - ,("tropical_fish","\128032") - ,("truck","\128666") - ,("trumpet","\127930") - ,("tshirt","\128085") - ,("tulip","\127799") - ,("turtle","\128034") - ,("tv","\128250") - ,("twisted_rightwards_arrows","\128256") - ,("two","2\65039\8419") - ,("two_hearts","\128149") - ,("two_men_holding_hands","\128108") - ,("two_women_holding_hands","\128109") - ,("u5272","\127545") - ,("u5408","\127540") - ,("u55b6","\127546") - ,("u6307","\127535") - ,("u6708","\127543\65039") - ,("u6709","\127542") - ,("u6e80","\127541") - ,("u7121","\127514") - ,("u7533","\127544") - ,("u7981","\127538") - ,("u7a7a","\127539") - ,("uk","\127468\127463") - ,("umbrella","\9748") - ,("unamused","\128530") - ,("underage","\128286") - ,("unlock","\128275") - ,("up","\127385") - ,("us","\127482\127480") - ,("v","\9996\65039") - ,("vertical_traffic_light","\128678") - ,("vhs","\128252") - ,("vibration_mode","\128243") - ,("video_camera","\128249") - ,("video_game","\127918") - ,("violin","\127931") - ,("virgo","\9805") - ,("volcano","\127755") - ,("vs","\127386") - ,("walking","\128694") - ,("waning_crescent_moon","\127768") - ,("waning_gibbous_moon","\127766") - ,("warning","\9888\65039") - ,("watch","\8986") - ,("water_buffalo","\128003") - ,("watermelon","\127817") - ,("wave","\128075") - ,("wavy_dash","\12336\65039") - ,("waxing_crescent_moon","\127762") - ,("waxing_gibbous_moon","\127764") - ,("wc","\128702") - ,("weary","\128553") - ,("wedding","\128146") - ,("whale","\128051") - ,("whale2","\128011") - ,("wheelchair","\9855") - ,("white_check_mark","\9989") - ,("white_circle","\9898") - ,("white_flower","\128174") - ,("white_large_square","\11036") - ,("white_medium_small_square","\9725") - ,("white_medium_square","\9723\65039") - ,("white_small_square","\9643\65039") - ,("white_square_button","\128307") - ,("wind_chime","\127888") - ,("wine_glass","\127863") - ,("wink","\128521") - ,("wolf","\128058") - ,("woman","\128105") - ,("womans_clothes","\128090") - ,("womans_hat","\128082") - ,("womens","\128698") - ,("worried","\128543") - ,("wrench","\128295") - ,("x","\10060") - ,("yellow_heart","\128155") - ,("yen","\128180") - ,("yum","\128523") - ,("zap","\9889") - ,("zero","0\65039\8419") - ,("zzz","\128164") - ] - diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs deleted file mode 100644 index 65f912c88..000000000 --- a/src/Text/Pandoc/Error.hs +++ /dev/null @@ -1,76 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} -{- | - Module : Text.Pandoc.Error - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -This module provides a standard way to deal with possible errors encounted -during parsing. - --} -module Text.Pandoc.Error ( - PandocError(..), - handleError) where - -import Text.Parsec.Error -import Text.Parsec.Pos hiding (Line) -import Data.Generics (Typeable) -import GHC.Generics (Generic) -import Control.Exception (Exception) -import Text.Pandoc.Shared (err) - -type Input = String - -data PandocError = PandocFileReadError FilePath - | PandocShouldNeverHappenError String - | PandocSomeError String - | PandocParseError String - | PandocParsecError Input ParseError - | PandocMakePDFError String - deriving (Show, Typeable, Generic) - -instance Exception PandocError - --- | Handle PandocError by exiting with an error message. -handleError :: Either PandocError a -> IO a -handleError (Right r) = return r -handleError (Left e) = - case e of - PandocFileReadError fp -> err 61 $ "problem reading " ++ fp - PandocShouldNeverHappenError s -> err 62 s - PandocSomeError s -> err 63 s - PandocParseError s -> err 64 s - PandocParsecError input err' -> - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - ls = lines input ++ [""] - errorInFile = if length ls > errLine - 1 - then concat ["\n", (ls !! (errLine - 1)) - ,"\n", replicate (errColumn - 1) ' ' - ,"^"] - else "" - in err 65 $ "\nError at " ++ show err' ++ errorInFile - PandocMakePDFError s -> err 65 s - diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs deleted file mode 100644 index d5e59e8e1..000000000 --- a/src/Text/Pandoc/Extensions.hs +++ /dev/null @@ -1,267 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} -{- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Extensions - Copyright : Copyright (C) 2012-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Data structures and functions for representing markup extensions. --} -module Text.Pandoc.Extensions ( Extension(..) - , Extensions - , emptyExtensions - , extensionsFromList - , extensionEnabled - , enableExtension - , disableExtension - , pandocExtensions - , plainExtensions - , strictExtensions - , phpMarkdownExtraExtensions - , githubMarkdownExtensions - , multimarkdownExtensions ) -where -import Data.Bits (testBit, setBit, clearBit) -import Data.Data (Data) -import Data.Typeable (Typeable) -import GHC.Generics (Generic) - -newtype Extensions = Extensions Integer - deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) - -extensionsFromList :: [Extension] -> Extensions -extensionsFromList = foldr enableExtension emptyExtensions - -emptyExtensions :: Extensions -emptyExtensions = Extensions 0 - -extensionEnabled :: Extension -> Extensions -> Bool -extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) - -enableExtension :: Extension -> Extensions -> Extensions -enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) - -disableExtension :: Extension -> Extensions -> Extensions -disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) - --- | Individually selectable syntax extensions. -data Extension = - Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes - | Ext_inline_notes -- ^ Pandoc-style inline notes - | Ext_pandoc_title_block -- ^ Pandoc title block - | Ext_yaml_metadata_block -- ^ YAML metadata block - | Ext_mmd_title_block -- ^ Multimarkdown metadata block - | Ext_table_captions -- ^ Pandoc-style table captions - | Ext_implicit_figures -- ^ A paragraph with just an image is a figure - | Ext_simple_tables -- ^ Pandoc-style simple tables - | Ext_multiline_tables -- ^ Pandoc-style multiline tables - | Ext_grid_tables -- ^ Grid tables (pandoc, reST) - | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) - | Ext_citations -- ^ Pandoc/citeproc citations - | Ext_raw_tex -- ^ Allow raw TeX (other than math) - | Ext_raw_html -- ^ Allow raw HTML - | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ - | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] - | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] - | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) - | Ext_fenced_code_blocks -- ^ Parse fenced code blocks - | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks - | Ext_inline_code_attributes -- ^ Allow attributes on inline code - | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks - | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags - | Ext_native_spans -- ^ Use Span inlines for contents of <span> - | Ext_bracketed_spans -- ^ Bracketed spans with attributes - | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown - -- iff container has attribute 'markdown' - | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak - | Ext_link_attributes -- ^ link and image attributes - | Ext_mmd_link_attributes -- ^ MMD style reference link attributes - | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links - | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters - | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank - | Ext_startnum -- ^ Make start number of ordered list significant - | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php - | Ext_compact_definition_lists -- ^ Definition lists without - -- space between items, and disallow laziness - | Ext_example_lists -- ^ Markdown-style numbered examples - | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable - | Ext_angle_brackets_escapable -- ^ Make < and > escapable - | Ext_intraword_underscores -- ^ Treat underscore inside word as literal - | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote - | Ext_blank_before_header -- ^ Require blank line before a header - | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax - | Ext_superscript -- ^ Superscript using ^this^ syntax - | Ext_subscript -- ^ Subscript using ~this~ syntax - | Ext_hard_line_breaks -- ^ All newlines become hard line breaks - | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored - | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between - -- East Asian wide characters - | Ext_literate_haskell -- ^ Enable literate Haskell conventions - | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions - | Ext_emoji -- ^ Support emoji like :smile: - | Ext_auto_identifiers -- ^ Automatic identifiers for headers - | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers - | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} - | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] - | Ext_implicit_header_references -- ^ Implicit reference links for headers - | Ext_line_blocks -- ^ RST style line blocks - | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - | Ext_shortcut_reference_links -- ^ Shortcut reference links - | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes - | Ext_old_dashes -- ^ -- = em, - before number = en - deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) - -pandocExtensions :: Extensions -pandocExtensions = extensionsFromList - [ Ext_footnotes - , Ext_inline_notes - , Ext_pandoc_title_block - , Ext_yaml_metadata_block - , Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_pipe_tables - , Ext_citations - , Ext_raw_tex - , Ext_raw_html - , Ext_tex_math_dollars - , Ext_latex_macros - , Ext_fenced_code_blocks - , Ext_fenced_code_attributes - , Ext_backtick_code_blocks - , Ext_inline_code_attributes - , Ext_markdown_in_html_blocks - , Ext_native_divs - , Ext_native_spans - , Ext_bracketed_spans - , Ext_escaped_line_breaks - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_all_symbols_escapable - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - , Ext_superscript - , Ext_subscript - , Ext_auto_identifiers - , Ext_header_attributes - , Ext_link_attributes - , Ext_implicit_header_references - , Ext_line_blocks - , Ext_shortcut_reference_links - , Ext_smart - ] - -plainExtensions :: Extensions -plainExtensions = extensionsFromList - [ Ext_table_captions - , Ext_implicit_figures - , Ext_simple_tables - , Ext_multiline_tables - , Ext_grid_tables - , Ext_latex_macros - , Ext_fancy_lists - , Ext_startnum - , Ext_definition_lists - , Ext_example_lists - , Ext_intraword_underscores - , Ext_blank_before_blockquote - , Ext_blank_before_header - , Ext_strikeout - ] - -phpMarkdownExtraExtensions :: Extensions -phpMarkdownExtraExtensions = extensionsFromList - [ Ext_footnotes - , Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_fenced_code_blocks - , Ext_definition_lists - , Ext_intraword_underscores - , Ext_header_attributes - , Ext_link_attributes - , Ext_abbreviations - , Ext_shortcut_reference_links - ] - -githubMarkdownExtensions :: Extensions -githubMarkdownExtensions = extensionsFromList - [ Ext_angle_brackets_escapable - , Ext_pipe_tables - , Ext_raw_html - , Ext_fenced_code_blocks - , Ext_auto_identifiers - , Ext_ascii_identifiers - , Ext_backtick_code_blocks - , Ext_autolink_bare_uris - , Ext_intraword_underscores - , Ext_strikeout - , Ext_hard_line_breaks - , Ext_emoji - , Ext_lists_without_preceding_blankline - , Ext_shortcut_reference_links - ] - -multimarkdownExtensions :: Extensions -multimarkdownExtensions = extensionsFromList - [ Ext_pipe_tables - , Ext_raw_html - , Ext_markdown_attribute - , Ext_mmd_link_attributes - -- , Ext_raw_tex - -- Note: MMD's raw TeX syntax requires raw TeX to be - -- enclosed in HTML comment - , Ext_tex_math_double_backslash - , Ext_intraword_underscores - , Ext_mmd_title_block - , Ext_footnotes - , Ext_definition_lists - , Ext_all_symbols_escapable - , Ext_implicit_header_references - , Ext_auto_identifiers - , Ext_mmd_header_identifiers - , Ext_implicit_figures - -- Note: MMD's syntax for superscripts and subscripts - -- is a bit more permissive than pandoc's, allowing - -- e^2 and a~1 instead of e^2^ and a~1~, so even with - -- these options we don't have full support for MMD - -- superscripts and subscripts, but there's no reason - -- not to include these: - , Ext_superscript - , Ext_subscript - ] - -strictExtensions :: Extensions -strictExtensions = extensionsFromList - [ Ext_raw_html - , Ext_shortcut_reference_links - ] - diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs deleted file mode 100644 index df060915c..000000000 --- a/src/Text/Pandoc/Highlighting.hs +++ /dev/null @@ -1,223 +0,0 @@ -{- -Copyright (C) 2008-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Exports functions for syntax highlighting. --} - -module Text.Pandoc.Highlighting ( highlightingStyles - , languages - , languagesByExtension - , highlight - , formatLaTeXInline - , formatLaTeXBlock - , styleToLaTeX - , formatHtmlInline - , formatHtmlBlock - , styleToCss - , pygments - , espresso - , zenburn - , tango - , kate - , monochrome - , haddock - , Style - , fromListingsLanguage - , toListingsLanguage - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared (safeRead) -import Skylighting -import Data.Maybe (fromMaybe) -import Data.Char (toLower) -import qualified Data.Map as M -import Control.Monad -import qualified Data.Text as T - -highlightingStyles :: [(String, Style)] -highlightingStyles = - [("pygments", pygments), - ("tango", tango), - ("espresso", espresso), - ("zenburn", zenburn), - ("kate", kate), - ("monochrome", monochrome), - ("breezedark", breezeDark), - ("haddock", haddock)] - -languages :: [String] -languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] - -languagesByExtension :: String -> [String] -languagesByExtension ext = - [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext] - -highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter - -> Attr -- ^ Attributes of the CodeBlock - -> String -- ^ Raw contents of the CodeBlock - -> Maybe a -- ^ Maybe the formatted result -highlight formatter (_, classes, keyvals) rawCode = - let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals)) - fmtOpts = defaultFormatOpts{ - startNumber = firstNum, - numberLines = any (`elem` - ["number","numberLines", "number-lines"]) classes } - tokenizeOpts = TokenizerConfig{ syntaxMap = defaultSyntaxMap - , traceOutput = False } - classes' = map T.pack classes - rawCode' = T.pack rawCode - in case msum (map (\l -> lookupSyntax l defaultSyntaxMap) classes') of - Nothing - | numberLines fmtOpts -> Just - $ formatter fmtOpts{ codeClasses = [], - containerClasses = classes' } - $ map (\ln -> [(NormalTok, ln)]) $ T.lines rawCode' - | otherwise -> Nothing - Just syntax -> - case tokenize tokenizeOpts syntax rawCode' of - Right slines -> Just $ - formatter fmtOpts{ codeClasses = - [T.toLower (sShortname syntax)], - containerClasses = classes' } slines - Left _ -> Nothing - --- Functions for correlating latex listings package's language names --- with skylighting language names: - -langToListingsMap :: M.Map String String -langToListingsMap = M.fromList langsList - -listingsToLangMap :: M.Map String String -listingsToLangMap = M.fromList $ map switch langsList - where switch (a,b) = (b,a) - -langsList :: [(String, String)] -langsList = - [("abap","ABAP"), - ("acm","ACM"), - ("acmscript","ACMscript"), - ("acsl","ACSL"), - ("ada","Ada"), - ("algol","Algol"), - ("ant","Ant"), - ("assembler","Assembler"), - ("gnuassembler","Assembler"), - ("awk","Awk"), - ("bash","bash"), - ("monobasic","Basic"), - ("purebasic","Basic"), - ("c","C"), - ("cpp","C++"), - ("c++","C++"), - ("ocaml","Caml"), - ("cil","CIL"), - ("clean","Clean"), - ("cobol","Cobol"), - ("comal80","Comal80"), - ("command.com","command.com"), - ("comsol","Comsol"), - ("csh","csh"), - ("delphi","Delphi"), - ("elan","Elan"), - ("erlang","erlang"), - ("euphoria","Euphoria"), - ("fortran","Fortran"), - ("gap","GAP"), - ("gcl","GCL"), - ("gnuplot","Gnuplot"), - ("hansl","hansl"), - ("haskell","Haskell"), - ("html","HTML"), - ("idl","IDL"), - ("inform","inform"), - ("java","Java"), - ("jvmis","JVMIS"), - ("ksh","ksh"), - ("lingo","Lingo"), - ("lisp","Lisp"), - ("commonlisp","Lisp"), - ("llvm","LLVM"), - ("logo","Logo"), - ("lua","Lua"), - ("make","make"), - ("makefile","make"), - ("mathematica","Mathematica"), - ("matlab","Matlab"), - ("mercury","Mercury"), - ("metapost","MetaPost"), - ("miranda","Miranda"), - ("mizar","Mizar"), - ("ml","ML"), - ("modula2","Modula-2"), - ("mupad","MuPAD"), - ("nastran","NASTRAN"), - ("oberon2","Oberon-2"), - ("ocl","OCL"), - ("octave","Octave"), - ("oz","Oz"), - ("pascal","Pascal"), - ("perl","Perl"), - ("php","PHP"), - ("pli","PL/I"), - ("plasm","Plasm"), - ("postscript","PostScript"), - ("pov","POV"), - ("prolog","Prolog"), - ("promela","Promela"), - ("pstricks","PSTricks"), - ("python","Python"), - ("r","R"), - ("reduce","Reduce"), - ("rexx","Rexx"), - ("rsl","RSL"), - ("ruby","Ruby"), - ("s","S"), - ("sas","SAS"), - ("scala","Scala"), - ("scilab","Scilab"), - ("sh","sh"), - ("shelxl","SHELXL"), - ("simula","Simula"), - ("sparql","SPARQL"), - ("sql","SQL"), - ("tcl","tcl"), - ("tex","TeX"), - ("latex","TeX"), - ("vbscript","VBScript"), - ("verilog","Verilog"), - ("vhdl","VHDL"), - ("vrml","VRML"), - ("xml","XML"), - ("xslt","XSLT")] - --- | Determine listings language name from skylighting language name. -toListingsLanguage :: String -> Maybe String -toListingsLanguage lang = M.lookup (map toLower lang) langToListingsMap - --- | Determine skylighting language name from listings language name. -fromListingsLanguage :: String -> Maybe String -fromListingsLanguage lang = M.lookup lang listingsToLangMap diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs deleted file mode 100644 index cc22c06ca..000000000 --- a/src/Text/Pandoc/ImageSize.hs +++ /dev/null @@ -1,547 +0,0 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} -{-# OPTIONS_GHC -fno-warn-type-defaults #-} -{- - Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., 59 - Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | -Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2016 John MacFarlane -License : GNU GPL, version 2 or above - -Maintainer : John MacFarlane <jgm@berkeley.edu> -Stability : alpha -Portability : portable - -Functions for determining the size of a PNG, JPEG, or GIF image. --} -module Text.Pandoc.ImageSize ( ImageType(..) - , imageType - , imageSize - , sizeInPixels - , sizeInPoints - , desiredSizeInPoints - , Dimension(..) - , Direction(..) - , dimension - , inInch - , inPoints - , numUnit - , showInInch - , showInPixel - , showFl - ) where -import Data.ByteString (ByteString, unpack) -import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Lazy as BL -import Data.Char (isDigit) -import Control.Monad -import Data.Bits -import Data.Binary -import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) -import Data.Default (Default) -import Numeric (showFFloat) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import qualified Data.Map as M -import Control.Monad.Except -import Data.Maybe (fromMaybe) - --- quick and dirty functions to get image sizes --- algorithms borrowed from wwwis.pl - -data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show -data Direction = Width | Height -instance Show Direction where - show Width = "width" - show Height = "height" - -data Dimension = Pixel Integer - | Centimeter Double - | Inch Double - | Percent Double -instance Show Dimension where - show (Pixel a) = show a ++ "px" - show (Centimeter a) = showFl a ++ "cm" - show (Inch a) = showFl a ++ "in" - show (Percent a) = show a ++ "%" - -data ImageSize = ImageSize{ - pxX :: Integer - , pxY :: Integer - , dpiX :: Integer - , dpiY :: Integer - } deriving (Read, Show, Eq) -instance Default ImageSize where - def = ImageSize 300 200 72 72 - -showFl :: (RealFloat a) => a -> String -showFl a = showFFloat (Just 5) a "" - -imageType :: ByteString -> Maybe ImageType -imageType img = case B.take 4 img of - "\x89\x50\x4e\x47" -> return Png - "\x47\x49\x46\x38" -> return Gif - "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF - "\xff\xd8\xff\xe1" -> return Jpeg -- Exif - "%PDF" -> return Pdf - "%!PS" - | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" - -> return Eps - _ -> mzero - -imageSize :: ByteString -> Either String ImageSize -imageSize img = - case imageType img of - Just Png -> mbToEither "could not determine PNG size" $ pngSize img - Just Gif -> mbToEither "could not determine GIF size" $ gifSize img - Just Jpeg -> jpegSize img - Just Eps -> mbToEither "could not determine EPS size" $ epsSize img - Just Pdf -> Left "could not determine PDF size" -- TODO - Nothing -> Left "could not determine image type" - where mbToEither msg Nothing = Left msg - mbToEither _ (Just x) = Right x - -defaultSize :: (Integer, Integer) -defaultSize = (72, 72) - -sizeInPixels :: ImageSize -> (Integer, Integer) -sizeInPixels s = (pxX s, pxY s) - --- | Calculate (height, width) in points using the image file's dpi metadata, --- using 72 Points == 1 Inch. -sizeInPoints :: ImageSize -> (Double, Double) -sizeInPoints s = (pxXf * 72 / dpiXf, pxYf * 72 / dpiYf) - where - pxXf = fromIntegral $ pxX s - pxYf = fromIntegral $ pxY s - dpiXf = fromIntegral $ dpiX s - dpiYf = fromIntegral $ dpiY s - --- | Calculate (height, width) in points, considering the desired dimensions in the --- attribute, while falling back on the image file's dpi metadata if no dimensions --- are specified in the attribute (or only dimensions in percentages). -desiredSizeInPoints :: WriterOptions -> Attr -> ImageSize -> (Double, Double) -desiredSizeInPoints opts attr s = - case (getDim Width, getDim Height) of - (Just w, Just h) -> (w, h) - (Just w, Nothing) -> (w, w / ratio) - (Nothing, Just h) -> (h * ratio, h) - (Nothing, Nothing) -> sizeInPoints s - where - ratio = fromIntegral (pxX s) / fromIntegral (pxY s) - getDim dir = case (dimension dir attr) of - Just (Percent _) -> Nothing - Just dim -> Just $ inPoints opts dim - Nothing -> Nothing - -inPoints :: WriterOptions -> Dimension -> Double -inPoints opts dim = 72 * inInch opts dim - -inInch :: WriterOptions -> Dimension -> Double -inInch opts dim = - case dim of - (Pixel a) -> fromIntegral a / (fromIntegral $ writerDpi opts) - (Centimeter a) -> a * 0.3937007874 - (Inch a) -> a - (Percent _) -> 0 - --- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000". --- Note: Dimensions in percentages are converted to the empty string. -showInInch :: WriterOptions -> Dimension -> String -showInInch _ (Percent _) = "" -showInInch opts dim = showFl $ inInch opts dim - --- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600". --- Note: Dimensions in percentages are converted to the empty string. -showInPixel :: WriterOptions -> Dimension -> String -showInPixel opts dim = - case dim of - (Pixel a) -> show a - (Centimeter a) -> show (floor $ dpi * a * 0.3937007874 :: Int) - (Inch a) -> show (floor $ dpi * a :: Int) - (Percent _) -> "" - where - dpi = fromIntegral $ writerDpi opts - --- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm") -numUnit :: String -> Maybe (Double, String) -numUnit s = - let (nums, unit) = span (\c -> isDigit c || ('.'==c)) s - in case safeRead nums of - Just n -> Just (n, unit) - Nothing -> Nothing - --- | Read a Dimension from an Attr attribute. --- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc. -dimension :: Direction -> Attr -> Maybe Dimension -dimension dir (_, _, kvs) = - case dir of - Width -> extractDim "width" - Height -> extractDim "height" - where - extractDim key = - case lookup key kvs of - Just str -> - case numUnit str of - Just (num, unit) -> toDim num unit - Nothing -> Nothing - Nothing -> Nothing - toDim a "cm" = Just $ Centimeter a - toDim a "mm" = Just $ Centimeter (a / 10) - toDim a "in" = Just $ Inch a - toDim a "inch" = Just $ Inch a - toDim a "%" = Just $ Percent a - toDim a "px" = Just $ Pixel (floor a::Integer) - toDim a "" = Just $ Pixel (floor a::Integer) - toDim _ _ = Nothing - -epsSize :: ByteString -> Maybe ImageSize -epsSize img = do - let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img - let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls - case ls' of - [] -> mzero - (x:_) -> case B.words x of - (_:_:_:ux:uy:[]) -> do - ux' <- safeRead $ B.unpack ux - uy' <- safeRead $ B.unpack uy - return ImageSize{ - pxX = ux' - , pxY = uy' - , dpiX = 72 - , dpiY = 72 } - _ -> mzero - -pngSize :: ByteString -> Maybe ImageSize -pngSize img = do - let (h, rest) = B.splitAt 8 img - guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" || - h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a" - let (i, rest') = B.splitAt 4 $ B.drop 4 rest - guard $ i == "MHDR" || i == "IHDR" - let (sizes, rest'') = B.splitAt 8 rest' - (x,y) <- case map fromIntegral $ unpack $ sizes of - ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return - ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, - (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> Nothing -- "PNG parse error" - let (dpix, dpiy) = findpHYs rest'' - return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } - -findpHYs :: ByteString -> (Integer, Integer) -findpHYs x = - if B.null x || "IDAT" `B.isPrefixOf` x - then (72,72) -- default, no pHYs - else if "pHYs" `B.isPrefixOf` x - then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral - $ unpack $ B.take 9 $ B.drop 4 x - factor = if u == 1 -- dots per meter - then \z -> z * 254 `div` 10000 - else const 72 - in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4, - factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 ) - else findpHYs $ B.drop 1 x -- read another byte - -gifSize :: ByteString -> Maybe ImageSize -gifSize img = do - let (h, rest) = B.splitAt 6 img - guard $ h == "GIF87a" || h == "GIF89a" - case map fromIntegral $ unpack $ B.take 4 rest of - [w2,w1,h2,h1] -> return ImageSize { - pxX = shift w1 8 + w2, - pxY = shift h1 8 + h2, - dpiX = 72, - dpiY = 72 - } - _ -> Nothing -- "GIF parse error" - -jpegSize :: ByteString -> Either String ImageSize -jpegSize img = - let (hdr, rest) = B.splitAt 4 img - in if B.length rest < 14 - then Left "unable to determine JPEG size" - else case hdr of - "\xff\xd8\xff\xe0" -> jfifSize rest - "\xff\xd8\xff\xe1" -> exifSize rest - _ -> Left "unable to determine JPEG size" - -jfifSize :: ByteString -> Either String ImageSize -jfifSize rest = - let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral - $ unpack $ B.take 5 $ B.drop 9 $ rest - factor = case dpiDensity of - 1 -> id - 2 -> \x -> (x * 254 `div` 10) - _ -> const 72 - dpix = factor (shift dpix1 8 + dpix2) - dpiy = factor (shift dpiy1 8 + dpiy2) - in case findJfifSize rest of - Left msg -> Left msg - Right (w,h) -> Right $ ImageSize { pxX = w - , pxY = h - , dpiX = dpix - , dpiY = dpiy } - -findJfifSize :: ByteString -> Either String (Integer,Integer) -findJfifSize bs = - let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs - in case B.uncons bs' of - Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> - case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of - [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2) - _ -> Left "JFIF parse error" - Just (_,bs'') -> - case map fromIntegral $ unpack $ B.take 2 bs'' of - [c1,c2] -> - let len = shift c1 8 + c2 - -- skip variables - in findJfifSize $ B.drop len bs'' - _ -> Left "JFIF parse error" - Nothing -> Left "Did not find JFIF length record" - -runGet' :: Get (Either String a) -> BL.ByteString -> Either String a -runGet' p bl = -#if MIN_VERSION_binary(0,7,0) - case runGetOrFail p bl of - Left (_,_,msg) -> Left msg - Right (_,_,x) -> x -#else - runGet p bl -#endif - - -exifSize :: ByteString -> Either String ImageSize -exifSize bs = runGet' header $ bl - where bl = BL.fromChunks [bs] - header = runExceptT $ exifHeader bl --- NOTE: It would be nicer to do --- runGet ((Just <$> exifHeader) <|> return Nothing) --- which would prevent pandoc from raising an error when an exif header can't --- be parsed. But we only get an Alternative instance for Get in binary 0.6, --- and binary 0.5 ships with ghc 7.6. - -exifHeader :: BL.ByteString -> ExceptT String Get ImageSize -exifHeader hdr = do - _app1DataSize <- lift getWord16be - exifHdr <- lift getWord32be - unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" - zeros <- lift getWord16be - unless (zeros == 0) $ throwError "Expected zeros after exif header" - -- beginning of tiff header -- we read whole thing to use - -- in getting data from offsets: - let tiffHeader = BL.drop 8 hdr - byteAlign <- lift getWord16be - let bigEndian = byteAlign == 0x4d4d - let (getWord16, getWord32, getWord64) = - if bigEndian - then (getWord16be, getWord32be, getWord64be) - else (getWord16le, getWord32le, getWord64le) - let getRational = do - num <- getWord32 - den <- getWord32 - return $ fromIntegral num / fromIntegral den - tagmark <- lift getWord16 - unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" - ifdOffset <- lift getWord32 - lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- lift getWord16 - let ifdEntry :: ExceptT String Get (TagType, DataFormat) - ifdEntry = do - tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable - <$> lift getWord16 - dataFormat <- lift getWord16 - numComponents <- lift getWord32 - (fmt, bytesPerComponent) <- - case dataFormat of - 1 -> return (UnsignedByte <$> getWord8, 1) - 2 -> return (AsciiString <$> - getLazyByteString - (fromIntegral numComponents), 1) - 3 -> return (UnsignedShort <$> getWord16, 2) - 4 -> return (UnsignedLong <$> getWord32, 4) - 5 -> return (UnsignedRational <$> getRational, 8) - 6 -> return (SignedByte <$> getWord8, 1) - 7 -> return (Undefined <$> getLazyByteString - (fromIntegral numComponents), 1) - 8 -> return (SignedShort <$> getWord16, 2) - 9 -> return (SignedLong <$> getWord32, 4) - 10 -> return (SignedRational <$> getRational, 8) - 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4) - 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8) - _ -> throwError $ "Unknown data format " ++ show dataFormat - let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here - then lift $ fmt <* skip (4 - totalBytes) - else do -- get data from offset - offs <- lift getWord32 - let bytesAtOffset = - BL.take (fromIntegral totalBytes) - $ BL.drop (fromIntegral offs) tiffHeader - case runGet' (Right <$> fmt) bytesAtOffset of - Left msg -> throwError msg - Right x -> return x - return (tag, payload) - entries <- sequence $ replicate (fromIntegral numentries) ifdEntry - subentries <- case lookup ExifOffset entries of - Just (UnsignedLong offset') -> do - pos <- lift bytesRead - lift $ skip (fromIntegral offset' - (fromIntegral pos - 8)) - numsubentries <- lift getWord16 - sequence $ - replicate (fromIntegral numsubentries) ifdEntry - _ -> return [] - let allentries = entries ++ subentries - (wdth, hght) <- case (lookup ExifImageWidth allentries, - lookup ExifImageHeight allentries) of - (Just (UnsignedLong w), Just (UnsignedLong h)) -> - return (fromIntegral w, fromIntegral h) - _ -> return defaultSize - -- we return a default width and height when - -- the exif header doesn't contain these - let resfactor = case lookup ResolutionUnit allentries of - Just (UnsignedShort 1) -> (100 / 254) - _ -> 1 - let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) - $ lookup XResolution allentries - let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) - $ lookup YResolution allentries - return $ ImageSize{ - pxX = wdth - , pxY = hght - , dpiX = xres - , dpiY = yres } - -data DataFormat = UnsignedByte Word8 - | AsciiString BL.ByteString - | UnsignedShort Word16 - | UnsignedLong Word32 - | UnsignedRational Rational - | SignedByte Word8 - | Undefined BL.ByteString - | SignedShort Word16 - | SignedLong Word32 - | SignedRational Rational - | SingleFloat Word32 - | DoubleFloat Word64 - deriving (Show) - -data TagType = ImageDescription - | Make - | Model - | Orientation - | XResolution - | YResolution - | ResolutionUnit - | Software - | DateTime - | WhitePoint - | PrimaryChromaticities - | YCbCrCoefficients - | YCbCrPositioning - | ReferenceBlackWhite - | Copyright - | ExifOffset - | ExposureTime - | FNumber - | ExposureProgram - | ISOSpeedRatings - | ExifVersion - | DateTimeOriginal - | DateTimeDigitized - | ComponentConfiguration - | CompressedBitsPerPixel - | ShutterSpeedValue - | ApertureValue - | BrightnessValue - | ExposureBiasValue - | MaxApertureValue - | SubjectDistance - | MeteringMode - | LightSource - | Flash - | FocalLength - | MakerNote - | UserComment - | FlashPixVersion - | ColorSpace - | ExifImageWidth - | ExifImageHeight - | RelatedSoundFile - | ExifInteroperabilityOffset - | FocalPlaneXResolution - | FocalPlaneYResolution - | FocalPlaneResolutionUnit - | SensingMethod - | FileSource - | SceneType - | UnknownTagType - deriving (Show, Eq, Ord) - -tagTypeTable :: M.Map Word16 TagType -tagTypeTable = M.fromList - [ (0x010e, ImageDescription) - , (0x010f, Make) - , (0x0110, Model) - , (0x0112, Orientation) - , (0x011a, XResolution) - , (0x011b, YResolution) - , (0x0128, ResolutionUnit) - , (0x0131, Software) - , (0x0132, DateTime) - , (0x013e, WhitePoint) - , (0x013f, PrimaryChromaticities) - , (0x0211, YCbCrCoefficients) - , (0x0213, YCbCrPositioning) - , (0x0214, ReferenceBlackWhite) - , (0x8298, Copyright) - , (0x8769, ExifOffset) - , (0x829a, ExposureTime) - , (0x829d, FNumber) - , (0x8822, ExposureProgram) - , (0x8827, ISOSpeedRatings) - , (0x9000, ExifVersion) - , (0x9003, DateTimeOriginal) - , (0x9004, DateTimeDigitized) - , (0x9101, ComponentConfiguration) - , (0x9102, CompressedBitsPerPixel) - , (0x9201, ShutterSpeedValue) - , (0x9202, ApertureValue) - , (0x9203, BrightnessValue) - , (0x9204, ExposureBiasValue) - , (0x9205, MaxApertureValue) - , (0x9206, SubjectDistance) - , (0x9207, MeteringMode) - , (0x9208, LightSource) - , (0x9209, Flash) - , (0x920a, FocalLength) - , (0x927c, MakerNote) - , (0x9286, UserComment) - , (0xa000, FlashPixVersion) - , (0xa001, ColorSpace) - , (0xa002, ExifImageWidth) - , (0xa003, ExifImageHeight) - , (0xa004, RelatedSoundFile) - , (0xa005, ExifInteroperabilityOffset) - , (0xa20e, FocalPlaneXResolution) - , (0xa20f, FocalPlaneYResolution) - , (0xa210, FocalPlaneResolutionUnit) - , (0xa217, SensingMethod) - , (0xa300, FileSource) - , (0xa301, SceneType) - ] diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs deleted file mode 100644 index 1f98d019e..000000000 --- a/src/Text/Pandoc/Logging.hs +++ /dev/null @@ -1,232 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-} -{- -Copyright (C) 2016-17 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} -{- | - Module : Text.Pandoc.Logging - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -This module provides data types and functions for warnings -and info messages. - --} -module Text.Pandoc.Logging ( - Verbosity(..) - , LogMessage(..) - , encodeLogMessages - , showLogMessage - , messageVerbosity - ) where - -import Text.Parsec.Pos -import Data.Data (Data) -import Data.Generics (Typeable) -import GHC.Generics (Generic) -import qualified Data.Text as Text -import Data.Aeson -import Text.Pandoc.Definition -import Data.Aeson.Encode.Pretty (encodePretty', keyOrder, - defConfig, Config(..)) -import qualified Data.ByteString.Lazy as BL - --- | Verbosity level. -data Verbosity = ERROR | WARNING | INFO | DEBUG - deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic) - -instance ToJSON Verbosity where - toJSON x = toJSON (show x) - -data LogMessage = - SkippedContent String SourcePos - | CouldNotParseYamlMetadata String SourcePos - | DuplicateLinkReference String SourcePos - | DuplicateNoteReference String SourcePos - | ReferenceNotFound String SourcePos - | CircularReference String SourcePos - | ParsingUnescaped String SourcePos - | CouldNotLoadIncludeFile String SourcePos - | ParsingTrace String SourcePos - | InlineNotRendered Inline - | BlockNotRendered Block - | DocxParserWarning String - | CouldNotFetchResource String String - | CouldNotDetermineImageSize String String - | CouldNotDetermineMimeType String - | CouldNotConvertTeXMath String String - deriving (Show, Eq, Data, Ord, Typeable, Generic) - -instance ToJSON LogMessage where - toJSON x = object $ "verbosity" .= toJSON (messageVerbosity x) : - case x of - SkippedContent s pos -> - ["type" .= String "SkippedContent", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= sourceLine pos, - "column" .= sourceColumn pos] - CouldNotParseYamlMetadata s pos -> - ["type" .= String "YamlSectionNotAnObject", - "message" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - DuplicateLinkReference s pos -> - ["type" .= String "DuplicateLinkReference", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - DuplicateNoteReference s pos -> - ["type" .= String "DuplicateNoteReference", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - ReferenceNotFound s pos -> - ["type" .= String "ReferenceNotFound", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - CircularReference s pos -> - ["type" .= String "CircularReference", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - ParsingUnescaped s pos -> - ["type" .= String "ParsingUnescaped", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - CouldNotLoadIncludeFile fp pos -> - ["type" .= String "CouldNotLoadIncludeFile", - "path" .= Text.pack fp, - "source" .= Text.pack (sourceName pos), - "line" .= toJSON (sourceLine pos), - "column" .= toJSON (sourceColumn pos)] - ParsingTrace s pos -> - ["type" .= String "ParsingTrace", - "contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), - "line" .= sourceLine pos, - "column" .= sourceColumn pos] - InlineNotRendered il -> - ["type" .= String "InlineNotRendered", - "contents" .= toJSON il] - BlockNotRendered bl -> - ["type" .= String "BlockNotRendered", - "contents" .= toJSON bl] - DocxParserWarning s -> - ["type" .= String "DocxParserWarning", - "contents" .= Text.pack s] - CouldNotFetchResource fp s -> - ["type" .= String "CouldNotFetchResource", - "path" .= Text.pack fp, - "message" .= Text.pack s] - CouldNotDetermineImageSize fp s -> - ["type" .= String "CouldNotDetermineImageSize", - "path" .= Text.pack fp, - "message" .= Text.pack s] - CouldNotDetermineMimeType fp -> - ["type" .= String "CouldNotDetermineMimeType", - "path" .= Text.pack fp] - CouldNotConvertTeXMath s msg -> - ["type" .= String "CouldNotConvertTeXMath", - "contents" .= Text.pack s, - "message" .= Text.pack msg] - -showPos :: SourcePos -> String -showPos pos = sn ++ "line " ++ - show (sourceLine pos) ++ " column " ++ show (sourceColumn pos) - where sn = if sourceName pos == "source" || sourceName pos == "" - then "" - else sourceName pos ++ " " - -encodeLogMessages :: [LogMessage] -> BL.ByteString -encodeLogMessages ms = - encodePretty' defConfig{ confCompare = - keyOrder [ "type", "verbosity", "contents", "message", "path", - "source", "line", "column" ] } ms - -showLogMessage :: LogMessage -> String -showLogMessage msg = - case msg of - SkippedContent s pos -> - "Skipped '" ++ s ++ "' at " ++ showPos pos - CouldNotParseYamlMetadata s pos -> - "Could not parse YAML metadata at " ++ showPos pos ++ - if null s then "" else (": " ++ s) - DuplicateLinkReference s pos -> - "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos - DuplicateNoteReference s pos -> - "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos - ReferenceNotFound s pos -> - "Reference not found for '" ++ s ++ "' at " ++ showPos pos - CircularReference s pos -> - "Circular reference '" ++ s ++ "' at " ++ showPos pos - ParsingUnescaped s pos -> - "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos - CouldNotLoadIncludeFile fp pos -> - "Could not load include file '" ++ fp ++ "' at " ++ showPos pos - ParsingTrace s pos -> - "Parsing trace at " ++ showPos pos ++ ": " ++ s - InlineNotRendered il -> - "Not rendering " ++ show il - BlockNotRendered bl -> - "Not rendering " ++ show bl - DocxParserWarning s -> - "Docx parser warning: " ++ s - CouldNotFetchResource fp s -> - "Could not fetch resource '" ++ fp ++ "'" ++ - if null s then "" else (": " ++ s) - CouldNotDetermineImageSize fp s -> - "Could not determine image size for '" ++ fp ++ "'" ++ - if null s then "" else (": " ++ s) - CouldNotDetermineMimeType fp -> - "Could not determine mime type for '" ++ fp ++ "'" - CouldNotConvertTeXMath s m -> - "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++ - if null m then "" else (':':'\n':m) - -messageVerbosity:: LogMessage -> Verbosity -messageVerbosity msg = - case msg of - SkippedContent{} -> INFO - CouldNotParseYamlMetadata{} -> WARNING - DuplicateLinkReference{} -> WARNING - DuplicateNoteReference{} -> WARNING - ReferenceNotFound{} -> WARNING - CircularReference{} -> WARNING - CouldNotLoadIncludeFile{} -> WARNING - ParsingUnescaped{} -> INFO - ParsingTrace{} -> DEBUG - InlineNotRendered{} -> INFO - BlockNotRendered{} -> INFO - DocxParserWarning{} -> WARNING - CouldNotFetchResource{} -> WARNING - CouldNotDetermineImageSize{} -> WARNING - CouldNotDetermineMimeType{} -> WARNING - CouldNotConvertTeXMath{} -> WARNING - - diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs deleted file mode 100644 index a08091217..000000000 --- a/src/Text/Pandoc/MIME.hs +++ /dev/null @@ -1,527 +0,0 @@ -{- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Mime type lookup for ODT writer. --} -module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, - extensionFromMimeType )where -import System.FilePath -import Data.Char ( toLower ) -import Data.List (isPrefixOf, isSuffixOf) -import Data.Maybe (fromMaybe) -import qualified Data.Map as M - -type MimeType = String - --- | Determine mime type appropriate for file path. -getMimeType :: FilePath -> Maybe MimeType -getMimeType fp - -- ODT - | fp == "layout-cache" = - Just "application/binary" - | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp = - Just "application/vnd.oasis.opendocument.formula" - -- generic - | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes - --- | Determime mime type appropriate for file path, defaulting to --- “application/octet-stream” if nothing else fits. -getMimeTypeDef :: FilePath -> MimeType -getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType - -extensionFromMimeType :: MimeType -> Maybe String -extensionFromMimeType mimetype = - M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes - -- note: we just look up the basic mime type, dropping the content-encoding etc. - -reverseMimeTypes :: M.Map MimeType String -reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList - -mimeTypes :: M.Map String MimeType -mimeTypes = M.fromList mimeTypesList - -mimeTypesList :: [(String, MimeType)] -mimeTypesList = -- List borrowed from happstack-server. - [("gz","application/x-gzip") - ,("cabal","application/x-cabal") - ,("%","application/x-trash") - ,("323","text/h323") - ,("3gp","video/3gpp") - ,("7z","application/x-7z-compressed") - ,("abw","application/x-abiword") - ,("ai","application/postscript") - ,("aif","audio/x-aiff") - ,("aifc","audio/x-aiff") - ,("aiff","audio/x-aiff") - ,("alc","chemical/x-alchemy") - ,("art","image/x-jg") - ,("asc","text/plain") - ,("asf","video/x-ms-asf") - ,("asn","chemical/x-ncbi-asn1") - ,("aso","chemical/x-ncbi-asn1-binary") - ,("asx","video/x-ms-asf") - ,("atom","application/atom") - ,("atomcat","application/atomcat+xml") - ,("atomsrv","application/atomserv+xml") - ,("au","audio/basic") - ,("avi","video/x-msvideo") - ,("b","chemical/x-molconn-Z") - ,("bak","application/x-trash") - ,("bat","application/x-msdos-program") - ,("bcpio","application/x-bcpio") - ,("bib","text/x-bibtex") - ,("bin","application/octet-stream") - ,("bmp","image/x-ms-bmp") - ,("boo","text/x-boo") - ,("book","application/x-maker") - ,("bsd","chemical/x-crossfire") - ,("c","text/x-csrc") - ,("c++","text/x-c++src") - ,("c3d","chemical/x-chem3d") - ,("cab","application/x-cab") - ,("cac","chemical/x-cache") - ,("cache","chemical/x-cache") - ,("cap","application/cap") - ,("cascii","chemical/x-cactvs-binary") - ,("cat","application/vnd.ms-pki.seccat") - ,("cbin","chemical/x-cactvs-binary") - ,("cbr","application/x-cbr") - ,("cbz","application/x-cbz") - ,("cc","text/x-c++src") - ,("cdf","application/x-cdf") - ,("cdr","image/x-coreldraw") - ,("cdt","image/x-coreldrawtemplate") - ,("cdx","chemical/x-cdx") - ,("cdy","application/vnd.cinderella") - ,("cef","chemical/x-cxf") - ,("cer","chemical/x-cerius") - ,("chm","chemical/x-chemdraw") - ,("chrt","application/x-kchart") - ,("cif","chemical/x-cif") - ,("class","application/java-vm") - ,("cls","text/x-tex") - ,("cmdf","chemical/x-cmdf") - ,("cml","chemical/x-cml") - ,("cod","application/vnd.rim.cod") - ,("com","application/x-msdos-program") - ,("cpa","chemical/x-compass") - ,("cpio","application/x-cpio") - ,("cpp","text/x-c++src") - ,("cpt","application/mac-compactpro") - ,("crl","application/x-pkcs7-crl") - ,("crt","application/x-x509-ca-cert") - ,("csf","chemical/x-cache-csf") - ,("csh","application/x-csh") - ,("csm","chemical/x-csml") - ,("csml","chemical/x-csml") - ,("css","text/css") - ,("csv","text/csv") - ,("ctab","chemical/x-cactvs-binary") - ,("ctx","chemical/x-ctx") - ,("cu","application/cu-seeme") - ,("cub","chemical/x-gaussian-cube") - ,("cxf","chemical/x-cxf") - ,("cxx","text/x-c++src") - ,("d","text/x-dsrc") - ,("dat","chemical/x-mopac-input") - ,("dcr","application/x-director") - ,("deb","application/x-debian-package") - ,("dif","video/dv") - ,("diff","text/x-diff") - ,("dir","application/x-director") - ,("djv","image/vnd.djvu") - ,("djvu","image/vnd.djvu") - ,("dl","video/dl") - ,("dll","application/x-msdos-program") - ,("dmg","application/x-apple-diskimage") - ,("dms","application/x-dms") - ,("doc","application/msword") - ,("dot","application/msword") - ,("dv","video/dv") - ,("dvi","application/x-dvi") - ,("dx","chemical/x-jcamp-dx") - ,("dxr","application/x-director") - ,("emb","chemical/x-embl-dl-nucleotide") - ,("embl","chemical/x-embl-dl-nucleotide") - ,("emf","image/x-emf") - ,("eml","message/rfc822") - ,("ent","chemical/x-ncbi-asn1-ascii") - ,("eot","application/vnd.ms-fontobject") - ,("eps","application/postscript") - ,("etx","text/x-setext") - ,("exe","application/x-msdos-program") - ,("ez","application/andrew-inset") - ,("fb","application/x-maker") - ,("fbdoc","application/x-maker") - ,("fch","chemical/x-gaussian-checkpoint") - ,("fchk","chemical/x-gaussian-checkpoint") - ,("fig","application/x-xfig") - ,("flac","application/x-flac") - ,("fli","video/fli") - ,("fm","application/x-maker") - ,("frame","application/x-maker") - ,("frm","application/x-maker") - ,("fs","text/plain") - ,("gal","chemical/x-gaussian-log") - ,("gam","chemical/x-gamess-input") - ,("gamin","chemical/x-gamess-input") - ,("gau","chemical/x-gaussian-input") - ,("gcd","text/x-pcs-gcd") - ,("gcf","application/x-graphing-calculator") - ,("gcg","chemical/x-gcg8-sequence") - ,("gen","chemical/x-genbank") - ,("gf","application/x-tex-gf") - ,("gif","image/gif") - ,("gjc","chemical/x-gaussian-input") - ,("gjf","chemical/x-gaussian-input") - ,("gl","video/gl") - ,("gnumeric","application/x-gnumeric") - ,("gpt","chemical/x-mopac-graph") - ,("gsf","application/x-font") - ,("gsm","audio/x-gsm") - ,("gtar","application/x-gtar") - ,("h","text/x-chdr") - ,("h++","text/x-c++hdr") - ,("hdf","application/x-hdf") - ,("hh","text/x-c++hdr") - ,("hin","chemical/x-hin") - ,("hpp","text/x-c++hdr") - ,("hqx","application/mac-binhex40") - ,("hs","text/x-haskell") - ,("hta","application/hta") - ,("htc","text/x-component") - ,("htm","text/html") - ,("html","text/html") - ,("hxx","text/x-c++hdr") - ,("ica","application/x-ica") - ,("ice","x-conference/x-cooltalk") - ,("ico","image/x-icon") - ,("ics","text/calendar") - ,("icz","text/calendar") - ,("ief","image/ief") - ,("iges","model/iges") - ,("igs","model/iges") - ,("iii","application/x-iphone") - ,("inp","chemical/x-gamess-input") - ,("ins","application/x-internet-signup") - ,("iso","application/x-iso9660-image") - ,("isp","application/x-internet-signup") - ,("ist","chemical/x-isostar") - ,("istr","chemical/x-isostar") - ,("jad","text/vnd.sun.j2me.app-descriptor") - ,("jar","application/java-archive") - ,("java","text/x-java") - ,("jdx","chemical/x-jcamp-dx") - ,("jmz","application/x-jmol") - ,("jng","image/x-jng") - ,("jnlp","application/x-java-jnlp-file") - ,("jpe","image/jpeg") - ,("jpeg","image/jpeg") - ,("jfif","image/jpeg") - ,("jpg","image/jpeg") - ,("js","application/x-javascript") - ,("kar","audio/midi") - ,("key","application/pgp-keys") - ,("kil","application/x-killustrator") - ,("kin","chemical/x-kinemage") - ,("kml","application/vnd.google-earth.kml+xml") - ,("kmz","application/vnd.google-earth.kmz") - ,("kpr","application/x-kpresenter") - ,("kpt","application/x-kpresenter") - ,("ksp","application/x-kspread") - ,("kwd","application/x-kword") - ,("kwt","application/x-kword") - ,("latex","application/x-latex") - ,("lha","application/x-lha") - ,("lhs","text/x-literate-haskell") - ,("lsf","video/x-la-asf") - ,("lsx","video/x-la-asf") - ,("ltx","text/x-tex") - ,("lyx","application/x-lyx") - ,("lzh","application/x-lzh") - ,("lzx","application/x-lzx") - ,("m3u","audio/mpegurl") - ,("m4a","audio/mpeg") - ,("m4v","video/x-m4v") - ,("maker","application/x-maker") - ,("man","application/x-troff-man") - ,("mcif","chemical/x-mmcif") - ,("mcm","chemical/x-macmolecule") - ,("mdb","application/msaccess") - ,("me","application/x-troff-me") - ,("mesh","model/mesh") - ,("mid","audio/midi") - ,("midi","audio/midi") - ,("mif","application/x-mif") - ,("mm","application/x-freemind") - ,("mmd","chemical/x-macromodel-input") - ,("mmf","application/vnd.smaf") - ,("mml","text/mathml") - ,("mmod","chemical/x-macromodel-input") - ,("mng","video/x-mng") - ,("moc","text/x-moc") - ,("mol","chemical/x-mdl-molfile") - ,("mol2","chemical/x-mol2") - ,("moo","chemical/x-mopac-out") - ,("mop","chemical/x-mopac-input") - ,("mopcrt","chemical/x-mopac-input") - ,("mov","video/quicktime") - ,("movie","video/x-sgi-movie") - ,("mp2","audio/mpeg") - ,("mp3","audio/mpeg") - ,("mp4","video/mp4") - ,("mpc","chemical/x-mopac-input") - ,("mpe","video/mpeg") - ,("mpeg","video/mpeg") - ,("mpega","audio/mpeg") - ,("mpg","video/mpeg") - ,("mpga","audio/mpeg") - ,("ms","application/x-troff-ms") - ,("msh","model/mesh") - ,("msi","application/x-msi") - ,("mvb","chemical/x-mopac-vib") - ,("mxu","video/vnd.mpegurl") - ,("nb","application/mathematica") - ,("nc","application/x-netcdf") - ,("nwc","application/x-nwc") - ,("o","application/x-object") - ,("oda","application/oda") - ,("odb","application/vnd.oasis.opendocument.database") - ,("odc","application/vnd.oasis.opendocument.chart") - ,("odf","application/vnd.oasis.opendocument.formula") - ,("odg","application/vnd.oasis.opendocument.graphics") - ,("odi","application/vnd.oasis.opendocument.image") - ,("odm","application/vnd.oasis.opendocument.text-master") - ,("odp","application/vnd.oasis.opendocument.presentation") - ,("ods","application/vnd.oasis.opendocument.spreadsheet") - ,("odt","application/vnd.oasis.opendocument.text") - ,("oga","audio/ogg") - ,("ogg","application/ogg") - ,("ogv","video/ogg") - ,("ogx","application/ogg") - ,("old","application/x-trash") - ,("otg","application/vnd.oasis.opendocument.graphics-template") - ,("oth","application/vnd.oasis.opendocument.text-web") - ,("otp","application/vnd.oasis.opendocument.presentation-template") - ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") - ,("otf","application/vnd.ms-opentype") - ,("ott","application/vnd.oasis.opendocument.text-template") - ,("oza","application/x-oz-application") - ,("p","text/x-pascal") - ,("p7r","application/x-pkcs7-certreqresp") - ,("pac","application/x-ns-proxy-autoconfig") - ,("pas","text/x-pascal") - ,("pat","image/x-coreldrawpattern") - ,("patch","text/x-diff") - ,("pbm","image/x-portable-bitmap") - ,("pcap","application/cap") - ,("pcf","application/x-font") - ,("pcf.Z","application/x-font") - ,("pcx","image/pcx") - ,("pdb","chemical/x-pdb") - ,("pdf","application/pdf") - ,("pfa","application/x-font") - ,("pfb","application/x-font") - ,("pgm","image/x-portable-graymap") - ,("pgn","application/x-chess-pgn") - ,("pgp","application/pgp-signature") - ,("php","application/x-httpd-php") - ,("php3","application/x-httpd-php3") - ,("php3p","application/x-httpd-php3-preprocessed") - ,("php4","application/x-httpd-php4") - ,("phps","application/x-httpd-php-source") - ,("pht","application/x-httpd-php") - ,("phtml","application/x-httpd-php") - ,("pk","application/x-tex-pk") - ,("pl","text/x-perl") - ,("pls","audio/x-scpls") - ,("pm","text/x-perl") - ,("png","image/png") - ,("pnm","image/x-portable-anymap") - ,("pot","text/plain") - ,("ppm","image/x-portable-pixmap") - ,("pps","application/vnd.ms-powerpoint") - ,("ppt","application/vnd.ms-powerpoint") - ,("prf","application/pics-rules") - ,("prt","chemical/x-ncbi-asn1-ascii") - ,("ps","application/postscript") - ,("psd","image/x-photoshop") - ,("py","text/x-python") - ,("pyc","application/x-python-code") - ,("pyo","application/x-python-code") - ,("qt","video/quicktime") - ,("qtl","application/x-quicktimeplayer") - ,("ra","audio/x-pn-realaudio") - ,("ram","audio/x-pn-realaudio") - ,("rar","application/rar") - ,("ras","image/x-cmu-raster") - ,("rd","chemical/x-mdl-rdfile") - ,("rdf","application/rdf+xml") - ,("rgb","image/x-rgb") - ,("rhtml","application/x-httpd-eruby") - ,("rm","audio/x-pn-realaudio") - ,("roff","application/x-troff") - ,("ros","chemical/x-rosdal") - ,("rpm","application/x-redhat-package-manager") - ,("rss","application/rss+xml") - ,("rtf","application/rtf") - ,("rtx","text/richtext") - ,("rxn","chemical/x-mdl-rxnfile") - ,("sct","text/scriptlet") - ,("sd","chemical/x-mdl-sdfile") - ,("sd2","audio/x-sd2") - ,("sda","application/vnd.stardivision.draw") - ,("sdc","application/vnd.stardivision.calc") - ,("sdd","application/vnd.stardivision.impress") - ,("sdf","application/vnd.stardivision.math") - ,("sds","application/vnd.stardivision.chart") - ,("sdw","application/vnd.stardivision.writer") - ,("ser","application/java-serialized-object") - ,("sgf","application/x-go-sgf") - ,("sgl","application/vnd.stardivision.writer-global") - ,("sh","application/x-sh") - ,("shar","application/x-shar") - ,("shtml","text/html") - ,("sid","audio/prs.sid") - ,("sik","application/x-trash") - ,("silo","model/mesh") - ,("sis","application/vnd.symbian.install") - ,("sisx","x-epoc/x-sisx-app") - ,("sit","application/x-stuffit") - ,("sitx","application/x-stuffit") - ,("skd","application/x-koan") - ,("skm","application/x-koan") - ,("skp","application/x-koan") - ,("skt","application/x-koan") - ,("smi","application/smil") - ,("smil","application/smil") - ,("snd","audio/basic") - ,("spc","chemical/x-galactic-spc") - ,("spl","application/futuresplash") - ,("spx","audio/ogg") - ,("src","application/x-wais-source") - ,("stc","application/vnd.sun.xml.calc.template") - ,("std","application/vnd.sun.xml.draw.template") - ,("sti","application/vnd.sun.xml.impress.template") - ,("stl","application/vnd.ms-pki.stl") - ,("stw","application/vnd.sun.xml.writer.template") - ,("sty","text/x-tex") - ,("sv4cpio","application/x-sv4cpio") - ,("sv4crc","application/x-sv4crc") - ,("svg","image/svg+xml") - -- removed for now, since it causes problems with - -- extensionFromMimeType: see #2183. - -- ,("svgz","image/svg+xml") - ,("sw","chemical/x-swissprot") - ,("swf","application/x-shockwave-flash") - ,("swfl","application/x-shockwave-flash") - ,("sxc","application/vnd.sun.xml.calc") - ,("sxd","application/vnd.sun.xml.draw") - ,("sxg","application/vnd.sun.xml.writer.global") - ,("sxi","application/vnd.sun.xml.impress") - ,("sxm","application/vnd.sun.xml.math") - ,("sxw","application/vnd.sun.xml.writer") - ,("t","application/x-troff") - ,("tar","application/x-tar") - ,("taz","application/x-gtar") - ,("tcl","application/x-tcl") - ,("tex","text/x-tex") - ,("texi","application/x-texinfo") - ,("texinfo","application/x-texinfo") - ,("text","text/plain") - ,("tgf","chemical/x-mdl-tgf") - ,("tgz","application/x-gtar") - ,("tif","image/tiff") - ,("tiff","image/tiff") - ,("tk","text/x-tcl") - ,("tm","text/texmacs") - ,("torrent","application/x-bittorrent") - ,("tr","application/x-troff") - ,("ts","text/texmacs") - ,("tsp","application/dsptype") - ,("tsv","text/tab-separated-values") - ,("ttf","application/x-font-truetype") - ,("txt","text/plain") - ,("udeb","application/x-debian-package") - ,("uls","text/iuls") - ,("ustar","application/x-ustar") - ,("val","chemical/x-ncbi-asn1-binary") - ,("vcd","application/x-cdlink") - ,("vcf","text/x-vcard") - ,("vcs","text/x-vcalendar") - ,("vmd","chemical/x-vmd") - ,("vms","chemical/x-vamas-iso14976") - ,("vrm","x-world/x-vrml") - ,("vrml","model/vrml") - ,("vs","text/plain") - ,("vsd","application/vnd.visio") - ,("vtt","text/vtt") - ,("wad","application/x-doom") - ,("wav","audio/x-wav") - ,("wax","audio/x-ms-wax") - ,("wbmp","image/vnd.wap.wbmp") - ,("wbxml","application/vnd.wap.wbxml") - ,("webm","video/webm") - ,("wk","application/x-123") - ,("wm","video/x-ms-wm") - ,("wma","audio/x-ms-wma") - ,("wmd","application/x-ms-wmd") - ,("wmf","image/x-wmf") - ,("wml","text/vnd.wap.wml") - ,("wmlc","application/vnd.wap.wmlc") - ,("wmls","text/vnd.wap.wmlscript") - ,("wmlsc","application/vnd.wap.wmlscriptc") - ,("wmv","video/x-ms-wmv") - ,("wmx","video/x-ms-wmx") - ,("wmz","application/x-ms-wmz") - ,("woff","application/font-woff") - ,("woff2","font/woff2") - ,("wp5","application/wordperfect5.1") - ,("wpd","application/wordperfect") - ,("wrl","model/vrml") - ,("wsc","text/scriptlet") - ,("wvx","video/x-ms-wvx") - ,("wz","application/x-wingz") - ,("xbm","image/x-xbitmap") - ,("xcf","application/x-xcf") - ,("xht","application/xhtml+xml") - ,("xhtml","application/xhtml+xml") - ,("xlb","application/vnd.ms-excel") - ,("xls","application/vnd.ms-excel") - ,("xlt","application/vnd.ms-excel") - ,("xml","application/xml") - ,("xpi","application/x-xpinstall") - ,("xpm","image/x-xpixmap") - ,("xsl","application/xml") - ,("xtel","chemical/x-xtel") - ,("xul","application/vnd.mozilla.xul+xml") - ,("xwd","image/x-xwindowdump") - ,("xyz","chemical/x-xyz") - ,("zip","application/zip") - ,("zmt","chemical/x-mopac-input") - ] - diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs deleted file mode 100644 index fe99be5fe..000000000 --- a/src/Text/Pandoc/MediaBag.hs +++ /dev/null @@ -1,113 +0,0 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} -{- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.MediaBag - Copyright : Copyright (C) 2014 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definition of a MediaBag object to hold binary resources, and an -interface for interacting with it. --} -module Text.Pandoc.MediaBag ( - MediaBag, - lookupMedia, - insertMedia, - mediaDirectory, - extractMediaBag - ) where -import System.FilePath -import qualified System.FilePath.Posix as Posix -import System.Directory (createDirectoryIfMissing) -import qualified Data.Map as M -import qualified Data.ByteString.Lazy as BL -import Control.Monad (when) -import Control.Monad.Trans (MonadIO(..)) -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Maybe (fromMaybe) -import System.IO (stderr) -import Data.Data (Data) -import Data.Typeable (Typeable) - --- | A container for a collection of binary resources, with names and --- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' --- can be used for an empty 'MediaBag', and '<>' can be used to append --- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) - deriving (Monoid, Data, Typeable) - -instance Show MediaBag where - show bag = "MediaBag " ++ show (mediaDirectory bag) - --- | Insert a media item into a 'MediaBag', replacing any existing --- value with the same name. -insertMedia :: FilePath -- ^ relative path and canonical name of resource - -> Maybe MimeType -- ^ mime type (Nothing = determine from extension) - -> BL.ByteString -- ^ contents of resource - -> MediaBag - -> MediaBag -insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap) - where mime = fromMaybe fallback mbMime - fallback = case takeExtension fp of - ".gz" -> getMimeTypeDef $ dropExtension fp - _ -> getMimeTypeDef fp - --- | Lookup a media item in a 'MediaBag', returning mime type and contents. -lookupMedia :: FilePath - -> MediaBag - -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap - --- | Get a list of the file paths stored in a 'MediaBag', with --- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, MimeType, Int)] -mediaDirectory (MediaBag mediamap) = - M.foldWithKey (\fp (mime,contents) -> - (((Posix.joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap - --- | Extract contents of MediaBag to a given directory. Print informational --- messages if 'verbose' is true. --- TODO: eventually we may want to put this into PandocMonad --- In PandocPure, it could write to the fake file system... -extractMediaBag :: MonadIO m - => Bool - -> FilePath - -> MediaBag - -> m () -extractMediaBag verbose dir (MediaBag mediamap) = liftIO $ do - sequence_ $ M.foldWithKey - (\fp (_ ,contents) -> - ((writeMedia verbose dir (Posix.joinPath fp, contents)):)) [] mediamap - -writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () -writeMedia verbose dir (subpath, bs) = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / - let fullpath = dir </> normalise subpath - createDirectoryIfMissing True $ takeDirectory fullpath - when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath - BL.writeFile fullpath bs - - diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs deleted file mode 100644 index bc62f87d0..000000000 --- a/src/Text/Pandoc/Options.hs +++ /dev/null @@ -1,217 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} -{- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Data structures and functions for representing parser and writer -options. --} -module Text.Pandoc.Options ( module Text.Pandoc.Extensions - , ReaderOptions(..) - , HTMLMathMethod (..) - , CiteMethod (..) - , ObfuscationMethod (..) - , HTMLSlideVariant (..) - , EPUBVersion (..) - , WrapOption (..) - , TopLevelDivision (..) - , WriterOptions (..) - , TrackChanges (..) - , ReferenceLocation (..) - , def - , isEnabled - ) where -import Text.Pandoc.Extensions -import Data.Default -import Text.Pandoc.Highlighting (Style, pygments) -import Data.Data (Data) -import Data.Typeable (Typeable) -import GHC.Generics (Generic) - -data ReaderOptions = ReaderOptions{ - readerExtensions :: Extensions -- ^ Syntax extensions - , readerStandalone :: Bool -- ^ Standalone document with header - , readerColumns :: Int -- ^ Number of columns in terminal - , readerTabStop :: Int -- ^ Tab stop - , readerApplyMacros :: Bool -- ^ Apply macros to TeX math - , readerIndentedCodeClasses :: [String] -- ^ Default classes for - -- indented code blocks - , readerDefaultImageExtension :: String -- ^ Default extension for images - , readerTrackChanges :: TrackChanges -} deriving (Show, Read, Data, Typeable, Generic) - -instance Default ReaderOptions - where def = ReaderOptions{ - readerExtensions = emptyExtensions - , readerStandalone = False - , readerColumns = 80 - , readerTabStop = 4 - , readerApplyMacros = True - , readerIndentedCodeClasses = [] - , readerDefaultImageExtension = "" - , readerTrackChanges = AcceptChanges - } - --- --- Writer options --- - -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic) - -data HTMLMathMethod = PlainMath - | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js - | JsMath (Maybe String) -- url of jsMath load script - | GladTeX - | WebTeX String -- url of TeX->image script. - | MathML - | MathJax String -- url of MathJax.js - | KaTeX String String -- url of stylesheet and katex.js - deriving (Show, Read, Eq, Data, Typeable, Generic) - -data CiteMethod = Citeproc -- use citeproc to render them - | Natbib -- output natbib cite commands - | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Methods for obfuscating email addresses in HTML. -data ObfuscationMethod = NoObfuscation - | ReferenceObfuscation - | JavascriptObfuscation - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Varieties of HTML slide shows. -data HTMLSlideVariant = S5Slides - | SlidySlides - | SlideousSlides - | DZSlides - | RevealJsSlides - | NoSlides - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Options for accepting or rejecting MS Word track-changes. -data TrackChanges = AcceptChanges - | RejectChanges - | AllChanges - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Options for wrapping text in the output. -data WrapOption = WrapAuto -- ^ Automatically wrap to width - | WrapNone -- ^ No non-semantic newlines - | WrapPreserve -- ^ Preserve wrapping of input source - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Options defining the type of top-level headers. -data TopLevelDivision = TopLevelPart -- ^ Top-level headers become parts - | TopLevelChapter -- ^ Top-level headers become chapters - | TopLevelSection -- ^ Top-level headers become sections - | TopLevelDefault -- ^ Top-level type is determined via - -- heuristics - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Locations for footnotes and references in markdown output -data ReferenceLocation = EndOfBlock -- ^ End of block - | EndOfSection -- ^ prior to next section header (or end of document) - | EndOfDocument -- ^ at end of document - deriving (Show, Read, Eq, Data, Typeable, Generic) - --- | Options for writers -data WriterOptions = WriterOptions - { writerTemplate :: Maybe String -- ^ Template to use - , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerIncremental :: Bool -- ^ True if lists should be incremental - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerNumberOffset :: [Int] -- ^ Starting number for section, subsection, ... - , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerExtensions :: Extensions -- ^ Markdown extensions that can be used - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions - , writerWrapText :: WrapOption -- ^ Option for wrapping text - , writerColumns :: Int -- ^ Characters in a line (for text wrapping) - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML - -- and for footnote marks in markdown - , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file - , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory - , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML - , writerSlideLevel :: Maybe Int -- ^ Force header level of slides - , writerTopLevelDivision :: TopLevelDivision -- ^ Type of top-level divisions - , writerListings :: Bool -- ^ Use listings package for code - , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting - -- (Nothing = no highlighting) - , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB - , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line - , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed - , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) - , writerTOCDepth :: Int -- ^ Number of levels to include in TOC - , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified - , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine - , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown - } deriving (Show, Data, Typeable, Generic) - -instance Default WriterOptions where - def = WriterOptions { writerTemplate = Nothing - , writerVariables = [] - , writerTabStop = 4 - , writerTableOfContents = False - , writerIncremental = False - , writerHTMLMathMethod = PlainMath - , writerNumberSections = False - , writerNumberOffset = [0,0,0,0,0,0] - , writerSectionDivs = False - , writerExtensions = emptyExtensions - , writerReferenceLinks = False - , writerDpi = 96 - , writerWrapText = WrapAuto - , writerColumns = 72 - , writerEmailObfuscation = NoObfuscation - , writerIdentifierPrefix = "" - , writerSourceURL = Nothing - , writerUserDataDir = Nothing - , writerCiteMethod = Citeproc - , writerHtmlQTags = False - , writerSlideLevel = Nothing - , writerTopLevelDivision = TopLevelDefault - , writerListings = False - , writerHighlightStyle = Just pygments - , writerSetextHeaders = True - , writerEpubMetadata = Nothing - , writerEpubStylesheet = Nothing - , writerEpubFonts = [] - , writerEpubChapterLevel = 1 - , writerTOCDepth = 3 - , writerReferenceDoc = Nothing - , writerLaTeXArgs = [] - , writerReferenceLocation = EndOfDocument - } - --- | Returns True if the given extension is enabled. -isEnabled :: Extension -> WriterOptions -> Bool -isEnabled ext opts = ext `extensionEnabled` (writerExtensions opts) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs deleted file mode 100644 index 1b3b4eb88..000000000 --- a/src/Text/Pandoc/PDF.hs +++ /dev/null @@ -1,369 +0,0 @@ -{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-} -{- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of LaTeX documents to PDF. --} -module Text.Pandoc.PDF ( makePDF ) where - -import Data.ByteString.Lazy (ByteString) -import qualified Data.ByteString.Lazy as B -import qualified Data.ByteString.Lazy.Char8 as BC -import qualified Data.ByteString as BS -import Data.Monoid ((<>)) -import System.Exit (ExitCode (..)) -import System.FilePath -import System.IO (stdout) -import System.IO.Temp (withTempFile) -import System.Directory -import Data.Digest.Pure.SHA (showDigest, sha1) -import System.Environment -import Control.Monad (unless, when, (<=<)) -import qualified Control.Exception as E -import Data.List (isInfixOf) -import Data.Maybe (fromMaybe) -import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Definition -import Text.Pandoc.MediaBag -import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (warn, withTempDir, inDirectory, stringify) -import Text.Pandoc.Writers.Shared (getField, metaToJSON) -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..)) -import Text.Pandoc.Logging (Verbosity(..)) -import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) -import Text.Pandoc.Process (pipeProcess) -import Control.Monad.Trans (MonadIO(..)) -import qualified Data.ByteString.Lazy as BL -import qualified Codec.Picture as JP -#ifdef _WINDOWS -import Data.List (intercalate) -#endif -import Text.Pandoc.Class (PandocIO, runIOorExplode, fetchItem, setMediaBag, runIO) - -#ifdef _WINDOWS -changePathSeparators :: FilePath -> FilePath -changePathSeparators = intercalate "/" . splitDirectories -#endif - -makePDF :: MonadIO m - => String -- ^ pdf creator (pdflatex, lualatex, - -- xelatex, context, wkhtmltopdf) - -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer - -> WriterOptions -- ^ options - -> Verbosity -- ^ verbosity level - -> MediaBag -- ^ media - -> Pandoc -- ^ document - -> m (Either ByteString ByteString) -makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do - let mathArgs = case writerHTMLMathMethod opts of - -- with MathJax, wait til all math is rendered: - MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });", - "--window-status", "mathjax_loaded"] - _ -> [] - meta' <- metaToJSON opts (return . stringify) (return . stringify) meta - let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd - let args = mathArgs ++ - concatMap toArgs - [("page-size", getField "papersize" meta') - ,("title", getField "title" meta') - ,("margin-bottom", fromMaybe (Just "1.2in") - (getField "margin-bottom" meta')) - ,("margin-top", fromMaybe (Just "1.25in") - (getField "margin-top" meta')) - ,("margin-right", fromMaybe (Just "1.25in") - (getField "margin-right" meta')) - ,("margin-left", fromMaybe (Just "1.25in") - (getField "margin-left" meta')) - ] - source <- runIOorExplode $ writer opts doc - html2pdf verbosity args source -makePDF program writer opts verbosity mediabag doc = - liftIO $ withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages opts mediabag tmpdir doc - source <- runIOorExplode $ writer opts doc' - let args = writerLaTeXArgs opts - case takeBaseName program of - "context" -> context2pdf verbosity tmpdir source - prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] - -> tex2pdf' verbosity args tmpdir program source - _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program - -handleImages :: WriterOptions - -> MediaBag - -> FilePath -- ^ temp dir to store images - -> Pandoc -- ^ document - -> IO Pandoc -handleImages opts mediabag tmpdir = - walkM (convertImages tmpdir) <=< walkM (handleImage' opts mediabag tmpdir) - -handleImage' :: WriterOptions - -> MediaBag - -> FilePath - -> Inline - -> IO Inline -handleImage' opts mediabag tmpdir (Image attr ils (src,tit)) = do - exists <- doesFileExist src - if exists - then return $ Image attr ils (src,tit) - else do - res <- runIO $ do - setMediaBag mediabag - fetchItem (writerSourceURL opts) src - case res of - Right (contents, Just mime) -> do - let ext = fromMaybe (takeExtension src) $ - extensionFromMimeType mime - let basename = showDigest $ sha1 $ BL.fromChunks [contents] - let fname = tmpdir </> basename <.> ext - BS.writeFile fname contents - return $ Image attr ils (fname,tit) - _ -> do - warn $ "Could not find image `" ++ src ++ "', skipping..." - -- return alt text - return $ Emph ils -handleImage' _ _ _ x = return x - -convertImages :: FilePath -> Inline -> IO Inline -convertImages tmpdir (Image attr ils (src, tit)) = do - img <- convertImage tmpdir src - newPath <- - case img of - Left e -> src <$ warn e - Right fp -> return fp - return (Image attr ils (newPath, tit)) -convertImages _ x = return x - --- Convert formats which do not work well in pdf to png -convertImage :: FilePath -> FilePath -> IO (Either String FilePath) -convertImage tmpdir fname = - case mime of - Just "image/png" -> doNothing - Just "image/jpeg" -> doNothing - Just "application/pdf" -> doNothing - _ -> JP.readImage fname >>= \res -> - case res of - Left _ -> return $ Left $ "Unable to convert `" ++ - fname ++ "' for use with pdflatex." - Right img -> - E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ - \(e :: E.SomeException) -> return (Left (show e)) - where - fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir - mime = getMimeType fname - doNothing = return (Right fname) - -tex2pdf' :: Verbosity -- ^ Verbosity level - -> [String] -- ^ Arguments to the latex-engine - -> FilePath -- ^ temp directory for output - -> String -- ^ tex program - -> String -- ^ tex source - -> IO (Either ByteString ByteString) -tex2pdf' verbosity args tmpDir program source = do - let numruns = if "\\tableofcontents" `isInfixOf` source - then 3 -- to get page numbers - else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source - case (exit, mbPdf) of - (ExitFailure _, _) -> do - let logmsg = extractMsg log' - let extramsg = - case logmsg of - x | "! Package inputenc Error" `BC.isPrefixOf` x - && program /= "xelatex" - -> "\nTry running pandoc with --latex-engine=xelatex." - _ -> "" - return $ Left $ logmsg <> extramsg - (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf - --- parsing output - -extractMsg :: ByteString -> ByteString -extractMsg log' = do - let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log' - let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg' - let lineno = take 1 rest - if null msg' - then log' - else BC.unlines (msg'' ++ lineno) - -extractConTeXtMsg :: ByteString -> ByteString -extractConTeXtMsg log' = do - let msg' = take 1 $ - dropWhile (not . ("tex error" `BC.isPrefixOf`)) $ BC.lines log' - if null msg' - then log' - else BC.unlines msg' - --- running tex programs - --- Run a TeX program on an input bytestring and return (exit code, --- contents of stdout, contents of produced PDF if any). Rerun --- a fixed number of times to resolve references. -runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath - -> String -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbosity program args runNumber numRuns tmpDir source = do - let file = tmpDir </> "input.tex" - exists <- doesFileExist file - unless exists $ UTF8.writeFile file source -#ifdef _WINDOWS - -- note: we want / even on Windows, for TexLive - let tmpDir' = changePathSeparators tmpDir - let file' = changePathSeparators file -#else - let tmpDir' = tmpDir - let file' = file -#endif - let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir'] ++ args ++ [file'] - env' <- getEnvironment - let sep = [searchPathSeparator] - let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) - $ lookup "TEXINPUTS" env' - let env'' = ("TEXINPUTS", texinputs) : - [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when (verbosity >= INFO && runNumber == 1) $ do - putStrLn "[makePDF] temp dir:" - putStrLn tmpDir' - putStrLn "[makePDF] Command line:" - putStrLn $ program ++ " " ++ unwords (map show programArgs) - putStr "\n" - putStrLn "[makePDF] Environment:" - mapM_ print env'' - putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file' ++ ":" - B.readFile file' >>= B.putStr - putStr "\n" - (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty - when (verbosity >= INFO) $ do - putStrLn $ "[makePDF] Run #" ++ show runNumber - B.hPutStr stdout out - putStr "\n" - if runNumber <= numRuns - then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source - else do - let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir - pdfExists <- doesFileExist pdfFile - pdf <- if pdfExists - -- We read PDF as a strict bytestring to make sure that the - -- temp directory is removed on Windows. - -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile - else return Nothing - return (exit, out, pdf) - -html2pdf :: Verbosity -- ^ Verbosity level - -> [String] -- ^ Args to wkhtmltopdf - -> String -- ^ HTML5 source - -> IO (Either ByteString ByteString) -html2pdf verbosity args source = do - file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp - pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp - UTF8.writeFile file source - let programArgs = args ++ [file, pdfFile] - env' <- getEnvironment - when (verbosity >= INFO) $ do - putStrLn "[makePDF] Command line:" - putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) - putStr "\n" - putStrLn "[makePDF] Environment:" - mapM_ print env' - putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr - putStr "\n" - (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty - removeFile file - when (verbosity >= INFO) $ do - B.hPutStr stdout out - putStr "\n" - pdfExists <- doesFileExist pdfFile - mbPdf <- if pdfExists - -- We read PDF as a strict bytestring to make sure that the - -- temp directory is removed on Windows. - -- See https://github.com/jgm/pandoc/issues/1192. - then do - res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile - removeFile pdfFile - return res - else return Nothing - return $ case (exit, mbPdf) of - (ExitFailure _, _) -> Left out - (ExitSuccess, Nothing) -> Left "" - (ExitSuccess, Just pdf) -> Right pdf - -context2pdf :: Verbosity -- ^ Verbosity level - -> FilePath -- ^ temp directory for output - -> String -- ^ ConTeXt source - -> IO (Either ByteString ByteString) -context2pdf verbosity tmpDir source = inDirectory tmpDir $ do - let file = "input.tex" - UTF8.writeFile file source -#ifdef _WINDOWS - -- note: we want / even on Windows, for TexLive - let tmpDir' = changePathSeparators tmpDir -#else - let tmpDir' = tmpDir -#endif - let programArgs = "--batchmode" : [file] - env' <- getEnvironment - let sep = [searchPathSeparator] - let texinputs = maybe (".." ++ sep) ((".." ++ sep) ++) - $ lookup "TEXINPUTS" env' - let env'' = ("TEXINPUTS", texinputs) : - [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - when (verbosity >= INFO) $ do - putStrLn "[makePDF] temp dir:" - putStrLn tmpDir' - putStrLn "[makePDF] Command line:" - putStrLn $ "context" ++ " " ++ unwords (map show programArgs) - putStr "\n" - putStrLn "[makePDF] Environment:" - mapM_ print env'' - putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - B.readFile file >>= B.putStr - putStr "\n" - (exit, out) <- pipeProcess (Just env'') "context" programArgs BL.empty - when (verbosity >= INFO) $ do - B.hPutStr stdout out - putStr "\n" - let pdfFile = replaceExtension file ".pdf" - pdfExists <- doesFileExist pdfFile - mbPdf <- if pdfExists - -- We read PDF as a strict bytestring to make sure that the - -- temp directory is removed on Windows. - -- See https://github.com/jgm/pandoc/issues/1192. - then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile - else return Nothing - case (exit, mbPdf) of - (ExitFailure _, _) -> do - let logmsg = extractConTeXtMsg out - return $ Left logmsg - (ExitSuccess, Nothing) -> return $ Left "" - (ExitSuccess, Just pdf) -> return $ Right pdf - diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs deleted file mode 100644 index 400d07f2a..000000000 --- a/src/Text/Pandoc/Parsing.hs +++ /dev/null @@ -1,1329 +0,0 @@ -{-# LANGUAGE - FlexibleContexts -, GeneralizedNewtypeDeriving -, TypeSynonymInstances -, MultiParamTypeClasses -, FlexibleInstances -, IncoherentInstances #-} - -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -A utility library with parsers used in pandoc readers. --} -module Text.Pandoc.Parsing ( anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - oneOfStringsCI, - spaceChar, - nonspaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - romanNumeral, - emailAddress, - uri, - mathInline, - mathDisplay, - withHorizDisplacement, - withRaw, - escaped, - characterReference, - anyOrderedListMarker, - orderedListMarker, - charRef, - lineBlockLines, - tableWith, - widthsFromIndices, - gridTableWith, - readWith, - readWithM, - testStringWith, - guardEnabled, - guardDisabled, - updateLastStrPos, - notAfterString, - logMessage, - reportLogMessages, - ParserState (..), - HasReaderOptions (..), - HasHeaderMap (..), - HasIdentifierList (..), - HasMacros (..), - HasLogMessages (..), - HasLastStrPosition (..), - defaultParserState, - HeaderType (..), - ParserContext (..), - QuoteContext (..), - HasQuoteContext (..), - NoteTable, - NoteTable', - KeyTable, - SubstTable, - Key (..), - toKey, - registerHeader, - smartPunctuation, - singleQuoteStart, - singleQuoteEnd, - doubleQuoteStart, - doubleQuoteEnd, - ellipses, - apostrophe, - dash, - nested, - citeKey, - macro, - applyMacros', - Parser, - ParserT, - F(..), - runF, - askF, - asksF, - token, - (<+?>), - extractIdClass, - insertIncludedFile, - -- * Re-exports from Text.Pandoc.Parsec - Stream, - runParser, - runParserT, - parse, - anyToken, - getInput, - setInput, - unexpected, - char, - letter, - digit, - alphaNum, - skipMany, - skipMany1, - spaces, - space, - anyChar, - satisfy, - newline, - string, - count, - eof, - noneOf, - oneOf, - lookAhead, - notFollowedBy, - many, - many1, - manyTill, - (<|>), - (<?>), - choice, - try, - sepBy, - sepBy1, - sepEndBy, - sepEndBy1, - endBy, - endBy1, - option, - optional, - optionMaybe, - getState, - setState, - updateState, - SourcePos, - getPosition, - setPosition, - sourceColumn, - sourceLine, - setSourceColumn, - setSourceLine, - newPos - ) -where - -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..)) -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.XML (fromEntities) -import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.Parsec hiding (token) -import Text.Parsec.Pos (newPos) -import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, - isHexDigit, isSpace, isPunctuation ) -import Data.List ( intercalate, transpose, isSuffixOf ) -import Text.Pandoc.Shared -import qualified Data.Map as M -import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, - parseMacroDefinitions) -import Text.HTML.TagSoup.Entity ( lookupEntity ) -import Text.Pandoc.Asciify (toAsciiChar) -import Data.Monoid ((<>)) -import Text.Pandoc.Class (PandocMonad, readFileFromDirs, report) -import Text.Pandoc.Logging -import Data.Default -import qualified Data.Set as Set -import Control.Monad.Reader -import Control.Monad.Identity -import Data.Maybe (catMaybes) - -import Text.Pandoc.Error -import Control.Monad.Except - -type Parser t s = Parsec t s - -type ParserT = ParsecT - -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) - -runF :: F a -> ParserState -> a -runF = runReader . unF - -askF :: F ParserState -askF = F ask - -asksF :: (ParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = liftM mconcat . sequence - --- | Parse any line of text -anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] -anyLine = do - -- This is much faster than: - -- manyTill anyChar newline - inp <- getInput - pos <- getPosition - case break (=='\n') inp of - (this, '\n':rest) -> do - -- needed to persuade parsec that this won't match an empty string: - anyChar - setInput rest - setPosition $ incSourceLine (setSourceColumn pos 1) 1 - return this - _ -> mzero - --- | Like @manyTill@, but reads at least one item. -many1Till :: Stream s m t - => ParserT s st m a - -> ParserT s st m end - -> ParserT s st m [a] -many1Till p end = do - first <- p - rest <- manyTill p end - return (first:rest) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m () -notFollowedBy' p = try $ join $ do a <- try p - return (unexpected (show a)) - <|> - return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - -oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String -oneOfStrings' _ [] = fail "no strings" -oneOfStrings' matches strs = try $ do - c <- anyChar - let strs' = [xs | (x:xs) <- strs, x `matches` c] - case strs' of - [] -> fail "not found" - _ -> (c:) <$> oneOfStrings' matches strs' - <|> if "" `elem` strs' - then return [c] - else fail "not found" - --- | Parses one of a list of strings. If the list contains --- two strings one of which is a prefix of the other, the longer --- string will be matched if possible. -oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String -oneOfStrings = oneOfStrings' (==) - --- | Parses one of a list of strings (tried in order), case insensitive. -oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String -oneOfStringsCI = oneOfStrings' ciMatch - where ciMatch x y = toLower' x == toLower' y - -- this optimizes toLower by checking common ASCII case - -- first, before calling the expensive unicode-aware - -- function: - toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32) - | isAscii c = c - | otherwise = toLower c - --- | Parses a space or tab. -spaceChar :: Stream s m Char => ParserT s st m Char -spaceChar = satisfy $ \c -> c == ' ' || c == '\t' - --- | Parses a nonspace, nonnewline character. -nonspaceChar :: Stream s m Char => ParserT s st m Char -nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] - --- | Skips zero or more spaces or tabs. -skipSpaces :: Stream s m Char => ParserT s st m () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: Stream s m Char => ParserT s st m Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Stream s m Char => ParserT s st m [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser - -> ParserT s st m end -- ^ end parser - -> ParserT s st m a -- ^ content parser (to be used repeatedly) - -> ParserT s st m [a] -enclosed start end parser = try $ - start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do - firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs - return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a -parseFromString parser str = do - oldPos <- getPosition - oldInput <- getInput - setInput str - result <- parser - spaces - eof - setInput oldInput - setPosition oldPos - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: Stream [Char] m Char => ParserT [Char] st m String -lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" --- and return "hello (there)". -charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char - -> ParserT s st m String -charsInBalanced open close parser = try $ do - char open - let isDelim c = c == open || c == close - raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser) - <|> (do res <- charsInBalanced open close parser - return $ [open] ++ res ++ [close]) - char close - return $ concat raw - --- old charsInBalanced would be: --- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline) --- old charsInBalanced' would be: --- charsInBalanced open close anyChar - --- Auxiliary functions for romanNumeral: - -lowercaseRomanDigits :: [Char] -lowercaseRomanDigits = ['i','v','x','l','c','d','m'] - -uppercaseRomanDigits :: [Char] -uppercaseRomanDigits = map toUpper lowercaseRomanDigits - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true - -> ParserT s st m Int -romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits - else lowercaseRomanDigits - lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = - map char romanDigits - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- Parsers for email addresses and URIs - --- | Parses an email address; returns original and corresponding --- escaped mailto: URI. -emailAddress :: Stream s m Char => ParserT s st m (String, String) -emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) - where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom - in (full, escapeURI $ "mailto:" ++ full) - mailbox = intercalate "." <$> (emailWord `sepby1` dot) - domain = intercalate "." <$> (subdomain `sepby1` dot) - dot = char '.' - subdomain = many1 $ alphaNum <|> innerPunct - -- this excludes some valid email addresses, since an - -- email could contain e.g. '__', but gives better results - -- for our purposes, when combined with markdown parsing: - innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') - <* notFollowedBy space - <* notFollowedBy (satisfy isPunctuation)) - -- technically an email address could begin with a symbol, - -- but allowing this creates too many problems. - -- See e.g. https://github.com/jgm/pandoc/issues/2940 - emailWord = do x <- satisfy isAlphaNum - xs <- many (satisfy isEmailChar) - return (x:xs) - isEmailChar c = isAlphaNum c || isEmailPunct c - isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;" - -- note: sepBy1 from parsec consumes input when sep - -- succeeds and p fails, so we use this variant here. - sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p)) - - --- Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes coap, doi, javascript, isbn, pmid -schemes :: [String] -schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", - "crid","data","dav","dict","dns","file","ftp","geo","go","gopher", - "h323","http","https","iax","icap","im","imap","info","ipp","iris", - "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid", - "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp", - "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve", - "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet", - "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon", - "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s", - "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin", - "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee", - "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb", - "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject", - "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms", - "keyparc","lastfm","ldaps","magnet","maps","market","message","mms", - "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi", - "platform","proxy","psyc","query","res","resource","rmi","rsync", - "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify", - "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004", - "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", - "ymsgr", "isbn", "pmid"] - -uriScheme :: Stream s m Char => ParserT s st m String -uriScheme = oneOfStringsCI schemes - --- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) -uri = try $ do - scheme <- uriScheme - char ':' - -- We allow sentence punctuation except at the end, since - -- we don't want the trailing '.' in 'http://google.com.' We want to allow - -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) - -- as a URL, while NOT picking up the closing paren in - -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" - let wordChar = satisfy isWordChar - let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) - let entity = () <$ characterReference - let punct = skipMany1 (char ',') - <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) - let uriChunk = skipMany1 wordChar - <|> percentEscaped - <|> entity - <|> (try $ punct >> - lookAhead (void (satisfy isWordChar) <|> percentEscaped)) - str <- snd <$> withRaw (skipMany1 ( () <$ - (enclosed (char '(') (char ')') uriChunk - <|> enclosed (char '{') (char '}') uriChunk - <|> enclosed (char '[') (char ']') uriChunk) - <|> uriChunk)) - str' <- option str $ char '/' >> return (str ++ "/") - let uri' = scheme ++ ":" ++ fromEntities str' - return (uri', escapeURI uri') - -mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf " \t\n\\") - <|> (char '\\' >> - -- This next clause is needed because \text{..} can - -- contain $, \(\), etc. - (try (string "text" >> - (("\\text" ++) <$> inBalancedBraces 0 "")) - <|> (\c -> ['\\',c]) <$> anyChar)) - <|> do (blankline <* notFollowedBy' blankline) <|> - (oneOf " \t" <* skipMany (oneOf " \t")) - notFollowedBy (char '$') - return " " - ) (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - where - inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String - inBalancedBraces 0 "" = do - c <- anyChar - if c == '{' - then inBalancedBraces 1 "{" - else mzero - inBalancedBraces 0 s = return $ reverse s - inBalancedBraces numOpen ('\\':xs) = do - c <- anyChar - inBalancedBraces numOpen (c:'\\':xs) - inBalancedBraces numOpen xs = do - c <- anyChar - case c of - '}' -> inBalancedBraces (numOpen - 1) (c:xs) - '{' -> inBalancedBraces (numOpen + 1) (c:xs) - _ -> inBalancedBraces numOpen (c:xs) - -mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl) - -mathDisplay :: (HasReaderOptions st, Stream s m Char) - => ParserT s st m String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathInline :: (HasReaderOptions st , Stream s m Char) - => ParserT s st m String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: Stream s m Char - => ParserT s st m a -- ^ Parser to apply - -> ParserT s st m (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Applies a parser and returns the raw string that was parsed, --- along with the value produced by the parser. -withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) -withRaw parser = do - pos1 <- getPosition - inp <- getInput - result <- parser - pos2 <- getPosition - let (l1,c1) = (sourceLine pos1, sourceColumn pos1) - let (l2,c2) = (sourceLine pos2, sourceColumn pos2) - let inplines = take ((l2 - l1) + 1) $ lines inp - let raw = case inplines of - [] -> "" - [l] -> take (c2 - c1) l - ls -> unlines (init ls) ++ take (c2 - 1) (last ls) - return (result, raw) - --- | Parses backslash, then applies character parser. -escaped :: Stream s m Char - => ParserT s st m Char -- ^ Parser for character to escape - -> ParserT s st m Char -escaped parser = try $ char '\\' >> parser - --- | Parse character entity. -characterReference :: Stream s m Char => ParserT s st m Char -characterReference = try $ do - char '&' - ent <- many1Till nonspaceChar (char ';') - let ent' = case ent of - '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" - case lookupEntity ent' of - Just (c : _) -> return c - _ -> fail "entity not found" - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -decimal = do - num <- many1 digit - return (Decimal, read num) - --- | Parses a '@' and optional label and --- returns (DefaultStyle, [next example number]). The next --- example number is incremented in parser state, and the label --- (if present) is added to the label table. -exampleNum :: Stream s m Char - => ParserT s ParserState m (ListNumberStyle, Int) -exampleNum = do - char '@' - lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) - st <- getState - let num = stateNextExample st - let newlabels = if null lab - then stateExamples st - else M.insert lab num $ stateExamples st - updateState $ \s -> s{ stateNextExample = num + 1 - , stateExamples = newlabels } - return (Example, num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -defaultNum = do - char '#' - return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> - (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes -anyOrderedListMarker = choice $ - [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, exampleNum, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: Stream s m Char - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes -inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) - --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: Stream s m Char - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes -inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: Stream s m Char - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes -inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: Stream s m Char - => ListNumberStyle - -> ListNumberDelim - -> ParserT s ParserState m Int -orderedListMarker style delim = do - let num = defaultNum <|> -- # can continue any kind of list - case style of - DefaultStyle -> decimal - Example -> exampleNum - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, _, _) <- context num - return start - --- | Parses a character reference and returns a Str element. -charRef :: Stream s m Char => ParserT s st m Inline -charRef = do - c <- characterReference - return $ Str [c] - -lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String -lineBlockLine = try $ do - char '|' - char ' ' - white <- many (spaceChar >> return '\160') - notFollowedBy newline - line <- anyLine - continuations <- many (try $ char ' ' >> anyLine) - return $ white ++ unwords (line : continuations) - -blankLineBlockLine :: Stream [Char] m Char => ParserT [Char] st m Char -blankLineBlockLine = try (char '|' >> blankline) - --- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String] -lineBlockLines = try $ do - lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine)) - skipMany1 $ blankline <|> blankLineBlockLine - return lines' - --- | Parse a table using 'headerParser', 'rowParser', --- 'lineParser', and 'footerParser'. -tableWith :: Stream s m Char - => ParserT s ParserState m ([Blocks], [Alignment], [Int]) - -> ([Int] -> ParserT s ParserState m [Blocks]) - -> ParserT s ParserState m sep - -> ParserT s ParserState m end - -> ParserT s ParserState m Blocks -tableWith headerParser rowParser lineParser footerParser = try $ do - (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy1` lineParser - footerParser - numColumns <- getOption readerColumns - let widths = if (indices == []) - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return $ B.table mempty (zip aligns widths) heads lines' - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns' indices = - let numColumns = max numColumns' (if null indices then 0 else last indices) - lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - ---- - --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -gridTableWith :: Stream [Char] m Char - => ParserT [Char] ParserState m Blocks -- ^ Block list parser - -> Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Blocks -gridTableWith blocks headless = - tableWith (gridTableHeader headless blocks) (gridTableRow blocks) - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int) -gridPart ch = do - dashes <- many1 (char ch) - char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: Stream [Char] m Char - => Bool -- ^ Headerless table - -> ParserT [Char] ParserState m Blocks - -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int]) -gridTableHeader headless blocks = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> - many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault - -- RST does not have a notion of alignments - let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString blocks) $ map trim rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: Stream [Char] m Char - => ParserT [Char] ParserState m Blocks - -> [Int] - -> ParserT [Char] ParserState m [Blocks] -gridTableRow blocks indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - mapM (liftM compactifyCell . parseFromString blocks) cols - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - -compactifyCell :: Blocks -> Blocks -compactifyCell bs = head $ compactify [bs] - --- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char] -gridTableFooter = blanklines - ---- - --- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Monad m) - => ParserT [Char] st m a -- ^ parser - -> st -- ^ initial state - -> String -- ^ input - -> m (Either PandocError a) -readWithM parser state input = - mapLeft (PandocParsecError input) `liftM` runParserT parser state "source" input - - --- | Parse a string with a given parser and state -readWith :: Parser [Char] st a - -> st - -> String - -> Either PandocError a -readWith p t inp = runIdentity $ readWithM p t inp - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) - => ParserT [Char] ParserState Identity a - -> [Char] - -> IO () -testStringWith parser str = UTF8.putStrLn $ show $ - readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState - { stateOptions :: ReaderOptions, -- ^ User options - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateAllowLinks :: Bool, -- ^ Allow parsing of links - stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph - stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys - stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys - stateSubstitutions :: SubstTable, -- ^ List of substitution references - stateNotes :: NoteTable, -- ^ List of notes (raw bodies) - stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) - stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata - stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) - stateIdentifiers :: Set.Set String, -- ^ Header identifiers used - stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateHasChapters :: Bool, -- ^ True if \chapter encountered - stateMacros :: [Macro], -- ^ List of macros defined so far - stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles - -- Triple represents: 1) Base role, 2) Optional format (only for :raw: - -- roles), 3) Additional classes (rest of Attr is unused)). - stateCaption :: Maybe Inlines, -- ^ Caption in current environment - stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed - stateContainers :: [String], -- ^ parent include files - stateLogMessages :: [LogMessage], -- ^ log messages - stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context - } - -instance Default ParserState where - def = defaultParserState - -instance HasMeta ParserState where - setMeta field val st = - st{ stateMeta = setMeta field val $ stateMeta st } - deleteMeta field st = - st{ stateMeta = deleteMeta field $ stateMeta st } - -class HasReaderOptions st where - extractReaderOptions :: st -> ReaderOptions - getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b - -- default - getOption f = (f . extractReaderOptions) <$> getState - -class HasQuoteContext st m where - getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext - withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a - -instance Monad m => HasQuoteContext ParserState m where - getQuoteContext = stateQuoteContext <$> getState - withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -instance HasReaderOptions ParserState where - extractReaderOptions = stateOptions - -class HasHeaderMap st where - extractHeaderMap :: st -> M.Map Inlines String - updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> - st -> st - -instance HasHeaderMap ParserState where - extractHeaderMap = stateHeaders - updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st } - -class HasIdentifierList st where - extractIdentifierList :: st -> Set.Set String - updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st - -instance HasIdentifierList ParserState where - extractIdentifierList = stateIdentifiers - updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } - -class HasMacros st where - extractMacros :: st -> [Macro] - updateMacros :: ([Macro] -> [Macro]) -> st -> st - -instance HasMacros ParserState where - extractMacros = stateMacros - updateMacros f st = st{ stateMacros = f $ stateMacros st } - -class HasLastStrPosition st where - setLastStrPos :: SourcePos -> st -> st - getLastStrPos :: st -> Maybe SourcePos - -instance HasLastStrPosition ParserState where - setLastStrPos pos st = st{ stateLastStrPos = Just pos } - getLastStrPos st = stateLastStrPos st - -class HasLogMessages st where - addLogMessage :: LogMessage -> st -> st - getLogMessages :: st -> [LogMessage] - -instance HasLogMessages ParserState where - addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st } - getLogMessages st = reverse $ stateLogMessages st - -defaultParserState :: ParserState -defaultParserState = - ParserState { stateOptions = def, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateAllowLinks = True, - stateMaxNestingLevel = 6, - stateLastStrPos = Nothing, - stateKeys = M.empty, - stateHeaderKeys = M.empty, - stateSubstitutions = M.empty, - stateNotes = [], - stateNotes' = [], - stateMeta = nullMeta, - stateMeta' = return nullMeta, - stateHeaderTable = [], - stateHeaders = M.empty, - stateIdentifiers = Set.empty, - stateNextExample = 1, - stateExamples = M.empty, - stateHasChapters = False, - stateMacros = [], - stateRstDefaultRole = "title-reference", - stateRstCustomRoles = M.empty, - stateCaption = Nothing, - stateInHtmlBlock = Nothing, - stateContainers = [], - stateLogMessages = [], - stateMarkdownAttribute = False - } - --- | Add a log message. -logMessage :: (Stream s m a, HasLogMessages st) - => LogMessage -> ParserT s st m () -logMessage msg = updateState (addLogMessage msg) - --- | Report all the accumulated log messages, according to verbosity level. -reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m () -reportLogMessages = do - msgs <- getLogMessages <$> getState - mapM_ report msgs - --- | Succeed only if the extension is enabled. -guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext - --- | Succeed only if the extension is disabled. -guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext - --- | Update the position on which the last string ended. -updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m () -updateLastStrPos = getPosition >>= updateState . setLastStrPos - --- | Whether we are right after the end of a string. -notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool -notAfterString = do - pos <- getPosition - st <- getState - return $ getLastStrPos st /= Just pos - -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) - -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) - -data QuoteContext - = InSingleQuote -- ^ Used when parsing inside single quotes - | InDoubleQuote -- ^ Used when parsing inside double quotes - | NoQuote -- ^ Used when not parsing inside quotes - deriving (Eq, Show) - -type NoteTable = [(String, String)] - -type NoteTable' = [(String, F Blocks)] -- used in markdown reader - -newtype Key = Key String deriving (Show, Read, Eq, Ord) - -toKey :: String -> Key -toKey = Key . map toLower . unwords . words . unbracket - where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs - unbracket xs = xs - -type KeyTable = M.Map Key (Target, Attr) - -type SubstTable = M.Map Key Inlines - --- | Add header to the list of headers in state, together --- with its associated identifier. If the identifier is null --- and the auto_identifers extension is set, generate a new --- unique identifier, and update the list of identifiers --- in state. -registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) - => Attr -> Inlines -> ParserT s st m Attr -registerHeader (ident,classes,kvs) header' = do - ids <- extractIdentifierList <$> getState - exts <- getOption readerExtensions - let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `extensionEnabled` exts - then do - let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `extensionEnabled` exts - then catMaybes $ map toAsciiChar id' - else id' - updateState $ updateIdentifierList $ Set.insert id' - updateState $ updateIdentifierList $ Set.insert id'' - updateState $ updateHeaderMap $ insert' header' id' - return (id'',classes,kvs) - else do - unless (null ident) $ - updateState $ updateHeaderMap $ insert' header' ident - return (ident,classes,kvs) - -smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines -smartPunctuation inlineParser = do - guardEnabled Ext_smart - choice [ quoted inlineParser, apostrophe, dash, ellipses ] - -apostrophe :: Stream s m Char => ParserT s st m Inlines -apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") - -quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines -quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser - -singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines -singleQuoted inlineParser = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= - return . B.singleQuoted . mconcat - -doubleQuoted :: (HasQuoteContext st m, Stream s m Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines -doubleQuoted inlineParser = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>= - return . B.doubleQuoted . mconcat - -failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) - => QuoteContext - -> ParserT s st m () -failIfInQuoteContext context = do - context' <- getQuoteContext - if context' == context - then fail "already inside quotes" - else return () - -charOrRef :: Stream s m Char => String -> ParserT s st m Char -charOrRef cs = - oneOf cs <|> try (do c <- characterReference - guard (c `elem` cs) - return c) - -singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) - => ParserT s st m () -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - -- single quote start can't be right after str - guard =<< notAfterString - () <$ charOrRef "'\8216\145" - -singleQuoteEnd :: Stream s m Char - => ParserT s st m () -singleQuoteEnd = try $ do - charOrRef "'\8217\146" - notFollowedBy alphaNum - -doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char) - => ParserT s st m () -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - try $ do charOrRef "\"\8220\147" - notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] - -doubleQuoteEnd :: Stream s m Char - => ParserT s st m () -doubleQuoteEnd = void (charOrRef "\"\8221\148") - -ellipses :: Stream s m Char - => ParserT s st m Inlines -ellipses = try (string "..." >> return (B.str "\8230")) - -dash :: (HasReaderOptions st, Stream s m Char) - => ParserT s st m Inlines -dash = try $ do - oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions - if oldDashes - then do - char '-' - (char '-' >> return (B.str "\8212")) - <|> (lookAhead digit >> return (B.str "\8211")) - else do - string "--" - (char '-' >> return (B.str "\8212")) - <|> return (B.str "\8211") - --- This is used to prevent exponential blowups for things like: --- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: Stream s m a - => ParserT s ParserState m a - -> ParserT s ParserState m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - -citeKey :: (Stream s m Char, HasLastStrPosition st) - => ParserT s st m (Bool, String) -citeKey = try $ do - guard =<< notAfterString - suppress_author <- option False (char '-' *> return True) - char '@' - firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite - let regchar = satisfy (\c -> isAlphaNum c || c == '_') - let internal p = try $ p <* lookAhead regchar - rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> - try (oneOf ":/" <* lookAhead (char '/')) - let key = firstChar:rest - return (suppress_author, key) - - -token :: (Stream s m t) - => (t -> String) - -> (t -> SourcePos) - -> (t -> Maybe a) - -> ParsecT s st m a -token pp pos match = tokenPrim pp (\_ t _ -> pos t) match - --- --- Macros --- - --- | Parse a \newcommand or \renewcommand macro definition. -macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st) - => ParserT [Char] st m Blocks -macro = do - apply <- getOption readerApplyMacros - inp <- getInput - case parseMacroDefinitions inp of - ([], _) -> mzero - (ms, rest) -> do def' <- count (length inp - length rest) anyChar - if apply - then do - updateState $ \st -> - updateMacros (ms ++) st - return mempty - else return $ rawBlock "latex" def' - --- | Apply current macros to string. -applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) - => String - -> ParserT [Char] st m String -applyMacros' target = do - apply <- getOption readerApplyMacros - if apply - then do macros <- extractMacros <$> getState - return $ applyMacros macros target - else return target - -infixr 5 <+?> -(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a -a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) - -extractIdClass :: Attr -> Attr -extractIdClass (ident, cls, kvs) = (ident', cls', kvs') - where - ident' = case (lookup "id" kvs) of - Just v -> v - Nothing -> ident - cls' = case (lookup "class" kvs) of - Just cl -> words cl - Nothing -> cls - kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs - -insertIncludedFile :: PandocMonad m - => ParserT String ParserState m Blocks - -> [FilePath] -> FilePath - -> ParserT String ParserState m Blocks -insertIncludedFile blocks dirs f = do - oldPos <- getPosition - oldInput <- getInput - containers <- stateContainers <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos - updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbcontents <- readFileFromDirs dirs f - contents <- case mbcontents of - Just s -> return s - Nothing -> do - report $ CouldNotLoadIncludeFile f oldPos - return "" - setPosition $ newPos f 1 1 - setInput contents - bs <- blocks - setInput oldInput - setPosition oldPos - updateState $ \s -> s{ stateContainers = tail $ stateContainers s } - return bs diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs deleted file mode 100644 index 256f38b0c..000000000 --- a/src/Text/Pandoc/Pretty.hs +++ /dev/null @@ -1,557 +0,0 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} -{- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA --} - -{- | - Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -A prettyprinting library for the production of text documents, -including wrapped text, indentated blocks, and tables. --} - -module Text.Pandoc.Pretty ( - Doc - , render - , cr - , blankline - , blanklines - , space - , text - , char - , prefixed - , flush - , nest - , hang - , beforeNonBlank - , nowrap - , afterBreak - , offset - , minOffset - , height - , lblock - , cblock - , rblock - , (<>) - , (<+>) - , ($$) - , ($+$) - , isEmpty - , empty - , cat - , hcat - , hsep - , vcat - , vsep - , nestle - , chomp - , inside - , braces - , brackets - , parens - , quotes - , doubleQuotes - , charWidth - , realLength - ) - -where -import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) -import qualified Data.Sequence as Seq -import Data.Foldable (toList) -import Data.List (intersperse) -import Data.String -import Control.Monad.State -import Data.Char (isSpace) -import Data.Monoid ((<>)) - -data RenderState a = RenderState{ - output :: [a] -- ^ In reverse order - , prefix :: String - , usePrefix :: Bool - , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping - , column :: Int - , newlines :: Int -- ^ Number of preceding newlines - } - -type DocState a = State (RenderState a) () - -data D = Text Int String - | Block Int [String] - | Prefixed String Doc - | BeforeNonBlank Doc - | Flush Doc - | BreakingSpace - | AfterBreak String - | CarriageReturn - | NewLine - | BlankLines Int -- number of blank lines - deriving (Show) - -newtype Doc = Doc { unDoc :: Seq D } - deriving (Monoid, Show) - -instance IsString Doc where - fromString = text - -isBlank :: D -> Bool -isBlank BreakingSpace = True -isBlank CarriageReturn = True -isBlank NewLine = True -isBlank (BlankLines _) = True -isBlank (Text _ (c:_)) = isSpace c -isBlank _ = False - --- | True if the document is empty. -isEmpty :: Doc -> Bool -isEmpty = Seq.null . unDoc - --- | The empty document. -empty :: Doc -empty = mempty - --- | Concatenate a list of 'Doc's. -cat :: [Doc] -> Doc -cat = mconcat - --- | Same as 'cat'. -hcat :: [Doc] -> Doc -hcat = mconcat - --- | Concatenate a list of 'Doc's, putting breakable spaces --- between them. -infixr 6 <+> -(<+>) :: Doc -> Doc -> Doc -(<+>) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> space <> y - --- | Same as 'cat', but putting breakable spaces between the --- 'Doc's. -hsep :: [Doc] -> Doc -hsep = foldr (<+>) empty - -infixr 5 $$ --- | @a $$ b@ puts @a@ above @b@. -($$) :: Doc -> Doc -> Doc -($$) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> cr <> y - -infixr 5 $+$ --- | @a $+$ b@ puts @a@ above @b@, with a blank line between. -($+$) :: Doc -> Doc -> Doc -($+$) x y = if isEmpty x - then y - else if isEmpty y - then x - else x <> blankline <> y - --- | List version of '$$'. -vcat :: [Doc] -> Doc -vcat = foldr ($$) empty - --- | List version of '$+$'. -vsep :: [Doc] -> Doc -vsep = foldr ($+$) empty - --- | Removes leading blank lines from a 'Doc'. -nestle :: Doc -> Doc -nestle (Doc d) = Doc $ go d - where go x = case viewl x of - (BlankLines _ :< rest) -> go rest - (NewLine :< rest) -> go rest - _ -> x - --- | Chomps trailing blank space off of a 'Doc'. -chomp :: Doc -> Doc -chomp d = Doc (fromList dl') - where dl = toList (unDoc d) - dl' = reverse $ go $ reverse dl - go [] = [] - go (BreakingSpace : xs) = go xs - go (CarriageReturn : xs) = go xs - go (NewLine : xs) = go xs - go (BlankLines _ : xs) = go xs - go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs - go xs = xs - -outp :: (IsString a) => Int -> String -> DocState a -outp off s | off < 0 = do -- offset < 0 means newline characters - st' <- get - let rawpref = prefix st' - when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do - let pref = reverse $ dropWhile isSpace $ reverse rawpref - modify $ \st -> st{ output = fromString pref : output st - , column = column st + realLength pref } - let numnewlines = length $ takeWhile (=='\n') $ reverse s - modify $ \st -> st { output = fromString s : output st - , column = 0 - , newlines = newlines st + numnewlines } -outp off s = do -- offset >= 0 (0 might be combining char) - st' <- get - let pref = prefix st' - when (column st' == 0 && usePrefix st' && not (null pref)) $ do - modify $ \st -> st{ output = fromString pref : output st - , column = column st + realLength pref } - modify $ \st -> st{ output = fromString s : output st - , column = column st + off - , newlines = 0 } - --- | Renders a 'Doc'. @render (Just n)@ will use --- a line length of @n@ to reflow text on breakable spaces. --- @render Nothing@ will not reflow text. -render :: (IsString a) => Maybe Int -> Doc -> a -render linelen doc = fromString . mconcat . reverse . output $ - execState (renderDoc doc) startingState - where startingState = RenderState{ - output = mempty - , prefix = "" - , usePrefix = True - , lineLength = linelen - , column = 0 - , newlines = 2 } - -renderDoc :: (IsString a, Monoid a) - => Doc -> DocState a -renderDoc = renderList . toList . unDoc - -data IsBlock = IsBlock Int [String] - --- This would be nicer with a pattern synonym --- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) - -renderList :: (IsString a, Monoid a) - => [D] -> DocState a -renderList [] = return () -renderList (Text off s : xs) = do - outp off s - renderList xs - -renderList (Prefixed pref d : xs) = do - st <- get - let oldPref = prefix st - put st{ prefix = prefix st ++ pref } - renderDoc d - modify $ \s -> s{ prefix = oldPref } - renderList xs - -renderList (Flush d : xs) = do - st <- get - let oldUsePrefix = usePrefix st - put st{ usePrefix = False } - renderDoc d - modify $ \s -> s{ usePrefix = oldUsePrefix } - renderList xs - -renderList (BeforeNonBlank d : xs) = - case xs of - (x:_) | isBlank x -> renderList xs - | otherwise -> renderDoc d >> renderList xs - [] -> renderList xs - -renderList [BlankLines _] = return () - -renderList (BlankLines m : BlankLines n : xs) = - renderList (BlankLines (max m n) : xs) - -renderList (BlankLines num : xs) = do - st <- get - case output st of - _ | newlines st > num -> return () - | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n") - renderList xs - -renderList (CarriageReturn : BlankLines m : xs) = - renderList (BlankLines m : xs) - -renderList (CarriageReturn : xs) = do - st <- get - if newlines st > 0 || null xs - then renderList xs - else do - outp (-1) "\n" - renderList xs - -renderList (NewLine : xs) = do - outp (-1) "\n" - renderList xs - -renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs) -renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs) -renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs) -renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs) -renderList (BreakingSpace : xs) = do - let isText (Text _ _) = True - isText (Block _ _) = True - isText (AfterBreak _) = True - isText _ = False - let isBreakingSpace BreakingSpace = True - isBreakingSpace _ = False - let xs' = dropWhile isBreakingSpace xs - let next = takeWhile isText xs' - st <- get - let off = sum $ map offsetOf next - case lineLength st of - Just l | column st + 1 + off > l -> do - outp (-1) "\n" - renderList xs' - _ -> do - outp 1 " " - renderList xs' - -renderList (AfterBreak s : xs) = do - st <- get - if newlines st > 0 - then outp (realLength s) s - else return () - renderList xs - -renderList (Block i1 s1 : Block i2 s2 : xs) = - renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) - -renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = - renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) - -renderList (Block _width lns : xs) = do - st <- get - let oldPref = prefix st - case column st - realLength oldPref of - n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' } - _ -> return () - renderList $ intersperse CarriageReturn (map (Text 0) lns) - modify $ \s -> s{ prefix = oldPref } - renderList xs - -mergeBlocks :: Bool -> IsBlock -> IsBlock -> D -mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = - Block (w1 + w2 + if addSpace then 1 else 0) $ - zipWith (\l1 l2 -> pad w1 l1 ++ l2) lns1' (map sp lns2') - where (lns1', lns2') = case (length lns1, length lns2) of - (x, y) | x > y -> (lns1, - lns2 ++ replicate (x - y) "") - | x < y -> (lns1 ++ replicate (y - x) "", - lns2) - | otherwise -> (lns1, lns2) - pad n s = s ++ replicate (n - realLength s) ' ' - sp "" = "" - sp xs = if addSpace then (' ' : xs) else xs - -offsetOf :: D -> Int -offsetOf (Text o _) = o -offsetOf (Block w _) = w -offsetOf BreakingSpace = 1 -offsetOf _ = 0 - --- | A literal string. -text :: String -> Doc -text = Doc . toChunks - where toChunks :: String -> Seq D - toChunks [] = mempty - toChunks s = case break (=='\n') s of - ([], _:ys) -> NewLine <| toChunks ys - (xs, _:ys) -> Text (realLength xs) xs <| - (NewLine <| toChunks ys) - (xs, []) -> singleton $ Text (realLength xs) xs - --- | A character. -char :: Char -> Doc -char c = text [c] - --- | A breaking (reflowable) space. -space :: Doc -space = Doc $ singleton BreakingSpace - --- | A carriage return. Does nothing if we're at the beginning of --- a line; otherwise inserts a newline. -cr :: Doc -cr = Doc $ singleton CarriageReturn - --- | Inserts a blank line unless one exists already. --- (@blankline <> blankline@ has the same effect as @blankline@. -blankline :: Doc -blankline = Doc $ singleton (BlankLines 1) - --- | Inserts a blank lines unless they exists already. --- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@. -blanklines :: Int -> Doc -blanklines n = Doc $ singleton (BlankLines n) - --- | Uses the specified string as a prefix for every line of --- the inside document (except the first, if not at the beginning --- of the line). -prefixed :: String -> Doc -> Doc -prefixed pref doc = Doc $ singleton $ Prefixed pref doc - --- | Makes a 'Doc' flush against the left margin. -flush :: Doc -> Doc -flush doc = Doc $ singleton $ Flush doc - --- | Indents a 'Doc' by the specified number of spaces. -nest :: Int -> Doc -> Doc -nest ind = prefixed (replicate ind ' ') - --- | A hanging indent. @hang ind start doc@ prints @start@, --- then @doc@, leaving an indent of @ind@ spaces on every --- line but the first. -hang :: Int -> Doc -> Doc -> Doc -hang ind start doc = start <> nest ind doc - --- | @beforeNonBlank d@ conditionally includes @d@ unless it is --- followed by blank space. -beforeNonBlank :: Doc -> Doc -beforeNonBlank d = Doc $ singleton (BeforeNonBlank d) - --- | Makes a 'Doc' non-reflowable. -nowrap :: Doc -> Doc -nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc - where replaceSpace _ BreakingSpace = Text 1 " " - replaceSpace _ x = x - --- | Content to print only if it comes at the beginning of a line, --- to be used e.g. for escaping line-initial `.` in groff man. -afterBreak :: String -> Doc -afterBreak s = Doc $ singleton (AfterBreak s) - --- | Returns the width of a 'Doc'. -offset :: Doc -> Int -offset d = case map realLength . lines . render Nothing $ d of - [] -> 0 - os -> maximum os - --- | Returns the minimal width of a 'Doc' when reflowed at breakable spaces. -minOffset :: Doc -> Int -minOffset d = maximum (0: map realLength (lines $ render (Just 0) d)) - --- | @lblock n d@ is a block of width @n@ characters, with --- text derived from @d@ and aligned to the left. -lblock :: Int -> Doc -> Doc -lblock = block id - --- | Like 'lblock' but aligned to the right. -rblock :: Int -> Doc -> Doc -rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w - --- | Like 'lblock' but aligned centered. -cblock :: Int -> Doc -> Doc -cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w - --- | Returns the height of a block or other 'Doc'. -height :: Doc -> Int -height = length . lines . render Nothing - -block :: (String -> String) -> Int -> Doc -> Doc -block filler width d - | width < 1 && not (isEmpty d) = error "Text.Pandoc.Pretty.block: width < 1" - | otherwise = Doc $ singleton $ Block width $ map filler - $ chop width $ render (Just width) d - -chop :: Int -> String -> [String] -chop _ [] = [] -chop n cs = case break (=='\n') cs of - (xs, ys) -> if len <= n - then case ys of - [] -> [xs] - ['\n'] -> [xs] - (_:zs) -> xs : chop n zs - else take n xs : chop n (drop n xs ++ ys) - where len = realLength xs - --- | Encloses a 'Doc' inside a start and end 'Doc'. -inside :: Doc -> Doc -> Doc -> Doc -inside start end contents = - start <> contents <> end - --- | Puts a 'Doc' in curly braces. -braces :: Doc -> Doc -braces = inside (char '{') (char '}') - --- | Puts a 'Doc' in square brackets. -brackets :: Doc -> Doc -brackets = inside (char '[') (char ']') - --- | Puts a 'Doc' in parentheses. -parens :: Doc -> Doc -parens = inside (char '(') (char ')') - --- | Wraps a 'Doc' in single quotes. -quotes :: Doc -> Doc -quotes = inside (char '\'') (char '\'') - --- | Wraps a 'Doc' in double quotes. -doubleQuotes :: Doc -> Doc -doubleQuotes = inside (char '"') (char '"') - --- | Returns width of a character in a monospace font: 0 for a combining --- character, 1 for a regular character, 2 for an East Asian wide character. -charWidth :: Char -> Int -charWidth c = - case c of - _ | c < '\x0300' -> 1 - | c >= '\x0300' && c <= '\x036F' -> 0 -- combining - | c >= '\x0370' && c <= '\x10FC' -> 1 - | c >= '\x1100' && c <= '\x115F' -> 2 - | c >= '\x1160' && c <= '\x11A2' -> 1 - | c >= '\x11A3' && c <= '\x11A7' -> 2 - | c >= '\x11A8' && c <= '\x11F9' -> 1 - | c >= '\x11FA' && c <= '\x11FF' -> 2 - | c >= '\x1200' && c <= '\x2328' -> 1 - | c >= '\x2329' && c <= '\x232A' -> 2 - | c >= '\x232B' && c <= '\x2E31' -> 1 - | c >= '\x2E80' && c <= '\x303E' -> 2 - | c == '\x303F' -> 1 - | c >= '\x3041' && c <= '\x3247' -> 2 - | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous - | c >= '\x3250' && c <= '\x4DBF' -> 2 - | c >= '\x4DC0' && c <= '\x4DFF' -> 1 - | c >= '\x4E00' && c <= '\xA4C6' -> 2 - | c >= '\xA4D0' && c <= '\xA95F' -> 1 - | c >= '\xA960' && c <= '\xA97C' -> 2 - | c >= '\xA980' && c <= '\xABF9' -> 1 - | c >= '\xAC00' && c <= '\xD7FB' -> 2 - | c >= '\xD800' && c <= '\xDFFF' -> 1 - | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous - | c >= '\xF900' && c <= '\xFAFF' -> 2 - | c >= '\xFB00' && c <= '\xFDFD' -> 1 - | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous - | c >= '\xFE10' && c <= '\xFE19' -> 2 - | c >= '\xFE20' && c <= '\xFE26' -> 1 - | c >= '\xFE30' && c <= '\xFE6B' -> 2 - | c >= '\xFE70' && c <= '\xFEFF' -> 1 - | c >= '\xFF01' && c <= '\xFF60' -> 2 - | c >= '\xFF61' && c <= '\x16A38' -> 1 - | c >= '\x1B000' && c <= '\x1B001' -> 2 - | c >= '\x1D000' && c <= '\x1F1FF' -> 1 - | c >= '\x1F200' && c <= '\x1F251' -> 2 - | c >= '\x1F300' && c <= '\x1F773' -> 1 - | c >= '\x20000' && c <= '\x3FFFD' -> 2 - | otherwise -> 1 - --- | Get real length of string, taking into account combining and double-wide --- characters. -realLength :: String -> Int -realLength = foldr (\a b -> charWidth a + b) 0 diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs deleted file mode 100644 index 294a38a1b..000000000 --- a/src/Text/Pandoc/Process.hs +++ /dev/null @@ -1,98 +0,0 @@ -{- -Copyright (C) 2013-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -ByteString variant of 'readProcessWithExitCode'. --} -module Text.Pandoc.Process (pipeProcess) -where -import System.Process -import System.Exit (ExitCode (..)) -import Control.Exception -import System.IO (hClose, hFlush) -import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) -import Control.Monad (unless) -import qualified Data.ByteString.Lazy as BL - -{- | -Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings -instead of strings and allows setting environment variables. - -@readProcessWithExitCode@ creates an external process, reads its -standard output strictly, waits until the process -terminates, and then returns the 'ExitCode' of the process -and the standard output. stderr is inherited from the parent. - -If an asynchronous exception is thrown to the thread executing -@readProcessWithExitCode@, the forked process will be terminated and -@readProcessWithExitCode@ will wait (block) until the process has been -terminated. --} - -pipeProcess - :: Maybe [(String, String)] -- ^ environment variables - -> FilePath -- ^ Filename of the executable (see 'proc' for details) - -> [String] -- ^ any arguments - -> BL.ByteString -- ^ standard input - -> IO (ExitCode,BL.ByteString) -- ^ exitcode, stdout -pipeProcess mbenv cmd args input = - mask $ \restore -> do - (Just inh, Just outh, Nothing, pid) <- createProcess (proc cmd args) - { env = mbenv, - std_in = CreatePipe, - std_out = CreatePipe, - std_err = Inherit } - flip onException - (do hClose inh; hClose outh; - terminateProcess pid; waitForProcess pid) $ restore $ do - -- fork off a thread to start consuming stdout - out <- BL.hGetContents outh - waitOut <- forkWait $ evaluate $ BL.length out - - -- now write and flush any input - let writeInput = do - unless (BL.null input) $ do - BL.hPutStr inh input - hFlush inh - hClose inh - - writeInput - - -- wait on the output - waitOut - - hClose outh - - -- wait on the process - ex <- waitForProcess pid - - return (ex, out) - -forkWait :: IO a -> IO (IO a) -forkWait a = do - res <- newEmptyMVar - _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res - return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return) diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs deleted file mode 100644 index b0bcbd580..000000000 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ /dev/null @@ -1,128 +0,0 @@ -{- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.CommonMark - Copyright : Copyright (C) 2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of CommonMark-formatted plain text to 'Pandoc' document. - -CommonMark is a strongly specified variant of Markdown: http://commonmark.org. --} -module Text.Pandoc.Readers.CommonMark (readCommonMark) -where - -import CMark -import Data.Text (unpack, pack) -import Data.List (groupBy) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Class (PandocMonad) - --- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc -readCommonMark opts s = return $ - nodeToPandoc $ commonmarkToNode opts' $ pack s - where opts' = if extensionEnabled Ext_smart (readerExtensions opts) - then [optNormalize, optSmart] - else [optNormalize] - -nodeToPandoc :: Node -> Pandoc -nodeToPandoc (Node _ DOCUMENT nodes) = - Pandoc nullMeta $ foldr addBlock [] nodes -nodeToPandoc n = -- shouldn't happen - Pandoc nullMeta $ foldr addBlock [] [n] - -addBlocks :: [Node] -> [Block] -addBlocks = foldr addBlock [] - -addBlock :: Node -> [Block] -> [Block] -addBlock (Node _ PARAGRAPH nodes) = - (Para (addInlines nodes) :) -addBlock (Node _ THEMATIC_BREAK _) = - (HorizontalRule :) -addBlock (Node _ BLOCK_QUOTE nodes) = - (BlockQuote (addBlocks nodes) :) -addBlock (Node _ (HTML_BLOCK t) _) = - (RawBlock (Format "html") (unpack t) :) --- Note: the cmark parser will never generate CUSTOM_BLOCK, --- so we don't need to handle it: -addBlock (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = - id -addBlock (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) -addBlock (Node _ (HEADING lev) nodes) = - (Header lev ("",[],[]) (addInlines nodes) :) -addBlock (Node _ (LIST listAttrs) nodes) = - (constructor (map (setTightness . addBlocks . children) nodes) :) - where constructor = case listType listAttrs of - BULLET_LIST -> BulletList - ORDERED_LIST -> OrderedList - (start, DefaultStyle, delim) - start = listStart listAttrs - setTightness = if listTight listAttrs - then map paraToPlain - else id - paraToPlain (Para xs) = Plain (xs) - paraToPlain x = x - delim = case listDelim listAttrs of - PERIOD_DELIM -> Period - PAREN_DELIM -> OneParen -addBlock (Node _ ITEM _) = id -- handled in LIST -addBlock _ = id - -children :: Node -> [Node] -children (Node _ _ ns) = ns - -addInlines :: [Node] -> [Inline] -addInlines = foldr addInline [] - -addInline :: Node -> [Inline] -> [Inline] -addInline (Node _ (TEXT t) _) = (map toinl clumps ++) - where raw = unpack t - clumps = groupBy samekind raw - samekind ' ' ' ' = True - samekind ' ' _ = False - samekind _ ' ' = False - samekind _ _ = True - toinl (' ':_) = Space - toinl xs = Str xs -addInline (Node _ LINEBREAK _) = (LineBreak :) -addInline (Node _ SOFTBREAK _) = (SoftBreak :) -addInline (Node _ (HTML_INLINE t) _) = - (RawInline (Format "html") (unpack t) :) --- Note: the cmark parser will never generate CUSTOM_BLOCK, --- so we don't need to handle it: -addInline (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = - id -addInline (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) -addInline (Node _ EMPH nodes) = - (Emph (addInlines nodes) :) -addInline (Node _ STRONG nodes) = - (Strong (addInlines nodes) :) -addInline (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) -addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs deleted file mode 100644 index bef256a93..000000000 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ /dev/null @@ -1,1055 +0,0 @@ -module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) -import Data.Either (rights) -import Data.Generics -import Data.Char (isSpace) -import Control.Monad.State -import Data.List (intersperse) -import Data.Maybe (fromMaybe) -import Text.TeXMath (readMathML, writeTeX) -import Data.Default -import Data.Foldable (asum) -import Text.Pandoc.Class (PandocMonad) - -{- - -List of all DocBook tags, with [x] indicating implemented, -[o] meaning intentionally left unimplemented (pass through): - -[o] abbrev - An abbreviation, especially one followed by a period -[x] abstract - A summary -[o] accel - A graphical user interface (GUI) keyboard shortcut -[x] ackno - Acknowledgements in an Article -[o] acronym - An often pronounceable word made from the initial -[o] action - A response to a user event -[o] address - A real-world address, generally a postal address -[ ] affiliation - The institutional affiliation of an individual -[ ] alt - Text representation for a graphical element -[o] anchor - A spot in the document -[x] answer - An answer to a question posed in a QandASet -[x] appendix - An appendix in a Book or Article -[x] appendixinfo - Meta-information for an Appendix -[o] application - The name of a software program -[x] area - A region defined for a Callout in a graphic or code example -[x] areaset - A set of related areas in a graphic or code example -[x] areaspec - A collection of regions in a graphic or code example -[ ] arg - An argument in a CmdSynopsis -[x] article - An article -[x] articleinfo - Meta-information for an Article -[ ] artpagenums - The page numbers of an article as published -[x] attribution - The source of a block quote or epigraph -[ ] audiodata - Pointer to external audio data -[ ] audioobject - A wrapper for audio data and its associated meta-information -[x] author - The name of an individual author -[ ] authorblurb - A short description or note about an author -[x] authorgroup - Wrapper for author information when a document has - multiple authors or collabarators -[x] authorinitials - The initials or other short identifier for an author -[o] beginpage - The location of a page break in a print version of the document -[ ] bibliocoverage - The spatial or temporal coverage of a document -[x] bibliodiv - A section of a Bibliography -[x] biblioentry - An entry in a Bibliography -[x] bibliography - A bibliography -[ ] bibliographyinfo - Meta-information for a Bibliography -[ ] biblioid - An identifier for a document -[o] bibliolist - A wrapper for a set of bibliography entries -[ ] bibliomisc - Untyped bibliographic information -[x] bibliomixed - An entry in a Bibliography -[ ] bibliomset - A cooked container for related bibliographic information -[ ] biblioref - A cross reference to a bibliographic entry -[ ] bibliorelation - The relationship of a document to another -[ ] biblioset - A raw container for related bibliographic information -[ ] bibliosource - The source of a document -[ ] blockinfo - Meta-information for a block element -[x] blockquote - A quotation set off from the main text -[x] book - A book -[x] bookinfo - Meta-information for a Book -[x] bridgehead - A free-floating heading -[x] callout - A “called out” description of a marked Area -[x] calloutlist - A list of Callouts -[x] caption - A caption -[x] caution - A note of caution -[x] chapter - A chapter, as of a book -[x] chapterinfo - Meta-information for a Chapter -[ ] citation - An inline bibliographic reference to another published work -[ ] citebiblioid - A citation of a bibliographic identifier -[ ] citerefentry - A citation to a reference page -[ ] citetitle - The title of a cited work -[ ] city - The name of a city in an address -[x] classname - The name of a class, in the object-oriented programming sense -[ ] classsynopsis - The syntax summary for a class definition -[ ] classsynopsisinfo - Information supplementing the contents of - a ClassSynopsis -[ ] cmdsynopsis - A syntax summary for a software command -[ ] co - The location of a callout embedded in text -[x] code - An inline code fragment -[x] col - Specifications for a column in an HTML table -[x] colgroup - A group of columns in an HTML table -[ ] collab - Identifies a collaborator -[ ] collabname - The name of a collaborator -[ ] colophon - Text at the back of a book describing facts about its production -[x] colspec - Specifications for a column in a table -[x] command - The name of an executable program or other software command -[x] computeroutput - Data, generally text, displayed or presented by a computer -[ ] confdates - The dates of a conference for which a document was written -[ ] confgroup - A wrapper for document meta-information about a conference -[ ] confnum - An identifier, frequently numerical, associated with a conference for which a document was written -[ ] confsponsor - The sponsor of a conference for which a document was written -[ ] conftitle - The title of a conference for which a document was written -[x] constant - A programming or system constant -[ ] constraint - A constraint in an EBNF production -[ ] constraintdef - The definition of a constraint in an EBNF production -[ ] constructorsynopsis - A syntax summary for a constructor -[ ] contractnum - The contract number of a document -[ ] contractsponsor - The sponsor of a contract -[ ] contrib - A summary of the contributions made to a document by a - credited source -[ ] copyright - Copyright information about a document -[ ] coref - A cross reference to a co -[ ] corpauthor - A corporate author, as opposed to an individual -[ ] corpcredit - A corporation or organization credited in a document -[ ] corpname - The name of a corporation -[ ] country - The name of a country -[ ] database - The name of a database, or part of a database -[x] date - The date of publication or revision of a document -[ ] dedication - A wrapper for the dedication section of a book -[ ] destructorsynopsis - A syntax summary for a destructor -[ ] edition - The name or number of an edition of a document -[ ] editor - The name of the editor of a document -[x] email - An email address -[x] emphasis - Emphasized text -[x] entry - A cell in a table -[ ] entrytbl - A subtable appearing in place of an Entry in a table -[ ] envar - A software environment variable -[x] epigraph - A short inscription at the beginning of a document or component - note: also handle embedded attribution tag -[x] equation - A displayed mathematical equation -[ ] errorcode - An error code -[ ] errorname - An error name -[ ] errortext - An error message. -[ ] errortype - The classification of an error message -[ ] example - A formal example, with a title -[ ] exceptionname - The name of an exception -[ ] fax - A fax number -[ ] fieldsynopsis - The name of a field in a class definition -[x] figure - A formal figure, generally an illustration, with a title -[x] filename - The name of a file -[ ] firstname - The first name of a person -[ ] firstterm - The first occurrence of a term -[x] footnote - A footnote -[ ] footnoteref - A cross reference to a footnote (a footnote mark) -[x] foreignphrase - A word or phrase in a language other than the primary - language of the document -[x] formalpara - A paragraph with a title -[ ] funcdef - A function (subroutine) name and its return type -[ ] funcparams - Parameters for a function referenced through a function - pointer in a synopsis -[ ] funcprototype - The prototype of a function -[ ] funcsynopsis - The syntax summary for a function definition -[ ] funcsynopsisinfo - Information supplementing the FuncDefs of a FuncSynopsis -[x] function - The name of a function or subroutine, as in a - programming language -[x] glossary - A glossary -[x] glossaryinfo - Meta-information for a Glossary -[x] glossdef - A definition in a GlossEntry -[x] glossdiv - A division in a Glossary -[x] glossentry - An entry in a Glossary or GlossList -[x] glosslist - A wrapper for a set of GlossEntrys -[x] glosssee - A cross-reference from one GlossEntry to another -[x] glossseealso - A cross-reference from one GlossEntry to another -[x] glossterm - A glossary term -[ ] graphic - A displayed graphical object (not an inline) - Note: in DocBook v5 `graphic` is discarded -[ ] graphicco - A graphic that contains callout areas - Note: in DocBook v5 `graphicco` is discarded -[ ] group - A group of elements in a CmdSynopsis -[ ] guibutton - The text on a button in a GUI -[ ] guiicon - Graphic and/or text appearing as a icon in a GUI -[ ] guilabel - The text of a label in a GUI -[x] guimenu - The name of a menu in a GUI -[x] guimenuitem - The name of a terminal menu item in a GUI -[x] guisubmenu - The name of a submenu in a GUI -[ ] hardware - A physical part of a computer system -[ ] highlights - A summary of the main points of the discussed component -[ ] holder - The name of the individual or organization that holds a copyright -[o] honorific - The title of a person -[ ] html:form - An HTML form -[x] imagedata - Pointer to external image data (only `fileref` attribute - implemented but not `entityref` which would require parsing of the DTD) -[x] imageobject - A wrapper for image data and its associated meta-information -[ ] imageobjectco - A wrapper for an image object with callouts -[x] important - An admonition set off from the text -[x] index - An index -[x] indexdiv - A division in an index -[x] indexentry - An entry in an index -[x] indexinfo - Meta-information for an Index -[x] indexterm - A wrapper for terms to be indexed -[x] info - A wrapper for information about a component or other block. (DocBook v5) -[x] informalequation - A displayed mathematical equation without a title -[x] informalexample - A displayed example without a title -[ ] informalfigure - A untitled figure -[ ] informaltable - A table without a title -[ ] initializer - The initializer for a FieldSynopsis -[x] inlineequation - A mathematical equation or expression occurring inline -[ ] inlinegraphic - An object containing or pointing to graphical data - that will be rendered inline -[x] inlinemediaobject - An inline media object (video, audio, image, and so on) -[ ] interface - An element of a GUI -[ ] interfacename - The name of an interface -[ ] invpartnumber - An inventory part number -[ ] isbn - The International Standard Book Number of a document -[ ] issn - The International Standard Serial Number of a periodical -[ ] issuenum - The number of an issue of a journal -[x] itemizedlist - A list in which each entry is marked with a bullet or - other dingbat -[ ] itermset - A set of index terms in the meta-information of a document -[ ] jobtitle - The title of an individual in an organization -[x] keycap - The text printed on a key on a keyboard -[ ] keycode - The internal, frequently numeric, identifier for a key - on a keyboard -[x] keycombo - A combination of input actions -[ ] keysym - The symbolic name of a key on a keyboard -[ ] keyword - One of a set of keywords describing the content of a document -[ ] keywordset - A set of keywords describing the content of a document -[ ] label - A label on a Question or Answer -[ ] legalnotice - A statement of legal obligations or requirements -[ ] lhs - The left-hand side of an EBNF production -[ ] lineage - The portion of a person's name indicating a relationship to - ancestors -[ ] lineannotation - A comment on a line in a verbatim listing -[x] link - A hypertext link -[x] listitem - A wrapper for the elements of a list item -[x] literal - Inline text that is some literal value -[x] literallayout - A block of text in which line breaks and white space are - to be reproduced faithfully -[ ] lot - A list of the titles of formal objects (as tables or figures) in - a document -[ ] lotentry - An entry in a list of titles -[ ] manvolnum - A reference volume number -[x] markup - A string of formatting markup in text that is to be - represented literally -[ ] mathphrase - A mathematical phrase, an expression that can be represented - with ordinary text and a small amount of markup -[ ] medialabel - A name that identifies the physical medium on which some - information resides -[x] mediaobject - A displayed media object (video, audio, image, etc.) -[ ] mediaobjectco - A media object that contains callouts -[x] member - An element of a simple list -[x] menuchoice - A selection or series of selections from a menu -[ ] methodname - The name of a method -[ ] methodparam - Parameters to a method -[ ] methodsynopsis - A syntax summary for a method -[x] mml:math - A MathML equation -[ ] modespec - Application-specific information necessary for the - completion of an OLink -[ ] modifier - Modifiers in a synopsis -[ ] mousebutton - The conventional name of a mouse button -[ ] msg - A message in a message set -[ ] msgaud - The audience to which a message in a message set is relevant -[ ] msgentry - A wrapper for an entry in a message set -[ ] msgexplan - Explanatory material relating to a message in a message set -[ ] msginfo - Information about a message in a message set -[ ] msglevel - The level of importance or severity of a message in a message set -[ ] msgmain - The primary component of a message in a message set -[ ] msgorig - The origin of a message in a message set -[ ] msgrel - A related component of a message in a message set -[ ] msgset - A detailed set of messages, usually error messages -[ ] msgsub - A subcomponent of a message in a message set -[ ] msgtext - The actual text of a message component in a message set -[ ] nonterminal - A non-terminal in an EBNF production -[x] note - A message set off from the text -[ ] objectinfo - Meta-information for an object -[ ] olink - A link that addresses its target indirectly, through an entity -[ ] ooclass - A class in an object-oriented programming language -[ ] ooexception - An exception in an object-oriented programming language -[ ] oointerface - An interface in an object-oriented programming language -[x] option - An option for a software command -[x] optional - Optional information -[x] orderedlist - A list in which each entry is marked with a sequentially - incremented label -[ ] orgdiv - A division of an organization -[ ] orgname - The name of an organization other than a corporation -[ ] otheraddr - Uncategorized information in address -[ ] othercredit - A person or entity, other than an author or editor, - credited in a document -[ ] othername - A component of a persons name that is not a first name, - surname, or lineage -[ ] package - A package -[ ] pagenums - The numbers of the pages in a book, for use in a bibliographic - entry -[x] para - A paragraph -[ ] paramdef - Information about a function parameter in a programming language -[x] parameter - A value or a symbolic reference to a value -[ ] part - A division in a book -[ ] partinfo - Meta-information for a Part -[ ] partintro - An introduction to the contents of a part -[ ] personblurb - A short description or note about a person -[ ] personname - The personal name of an individual -[ ] phone - A telephone number -[ ] phrase - A span of text -[ ] pob - A post office box in an address -[ ] postcode - A postal code in an address -[x] preface - Introductory matter preceding the first chapter of a book -[ ] prefaceinfo - Meta-information for a Preface -[ ] primary - The primary word or phrase under which an index term should be - sorted -[ ] primaryie - A primary term in an index entry, not in the text -[ ] printhistory - The printing history of a document -[ ] procedure - A list of operations to be performed in a well-defined sequence -[ ] production - A production in a set of EBNF productions -[ ] productionrecap - A cross-reference to an EBNF production -[ ] productionset - A set of EBNF productions -[ ] productname - The formal name of a product -[ ] productnumber - A number assigned to a product -[x] programlisting - A literal listing of all or part of a program -[ ] programlistingco - A program listing with associated areas used in callouts -[x] prompt - A character or string indicating the start of an input field in - a computer display -[ ] property - A unit of data associated with some part of a computer system -[ ] pubdate - The date of publication of a document -[ ] publisher - The publisher of a document -[ ] publishername - The name of the publisher of a document -[ ] pubsnumber - A number assigned to a publication other than an ISBN or ISSN - or inventory part number -[x] qandadiv - A titled division in a QandASet -[o] qandaentry - A question/answer set within a QandASet -[o] qandaset - A question-and-answer set -[x] question - A question in a QandASet -[x] quote - An inline quotation -[ ] refclass - The scope or other indication of applicability of a - reference entry -[ ] refdescriptor - A description of the topic of a reference page -[ ] refentry - A reference page (originally a UNIX man-style reference page) -[ ] refentryinfo - Meta-information for a Refentry -[ ] refentrytitle - The title of a reference page -[ ] reference - A collection of reference entries -[ ] referenceinfo - Meta-information for a Reference -[ ] refmeta - Meta-information for a reference entry -[ ] refmiscinfo - Meta-information for a reference entry other than the title - and volume number -[ ] refname - The name of (one of) the subject(s) of a reference page -[ ] refnamediv - The name, purpose, and classification of a reference page -[ ] refpurpose - A short (one sentence) synopsis of the topic of a reference - page -[x] refsect1 - A major subsection of a reference entry -[x] refsect1info - Meta-information for a RefSect1 -[x] refsect2 - A subsection of a RefSect1 -[x] refsect2info - Meta-information for a RefSect2 -[x] refsect3 - A subsection of a RefSect2 -[x] refsect3info - Meta-information for a RefSect3 -[x] refsection - A recursive section in a refentry -[x] refsectioninfo - Meta-information for a refsection -[ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page -[ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv -[x] releaseinfo - Information about a particular release of a document -[ ] remark - A remark (or comment) intended for presentation in a draft - manuscript -[ ] replaceable - Content that may or must be replaced by the user -[ ] returnvalue - The value returned by a function -[ ] revdescription - A extended description of a revision to a document -[ ] revhistory - A history of the revisions to a document -[ ] revision - An entry describing a single revision in the history of the - revisions to a document -[ ] revnumber - A document revision number -[ ] revremark - A description of a revision to a document -[ ] rhs - The right-hand side of an EBNF production -[x] row - A row in a table -[ ] sbr - An explicit line break in a command synopsis -[x] screen - Text that a user sees or might see on a computer screen -[o] screenco - A screen with associated areas used in callouts -[o] screeninfo - Information about how a screen shot was produced -[ ] screenshot - A representation of what the user sees or might see on a - computer screen -[ ] secondary - A secondary word or phrase in an index term -[ ] secondaryie - A secondary term in an index entry, rather than in the text -[x] sect1 - A top-level section of document -[x] sect1info - Meta-information for a Sect1 -[x] sect2 - A subsection within a Sect1 -[x] sect2info - Meta-information for a Sect2 -[x] sect3 - A subsection within a Sect2 -[x] sect3info - Meta-information for a Sect3 -[x] sect4 - A subsection within a Sect3 -[x] sect4info - Meta-information for a Sect4 -[x] sect5 - A subsection within a Sect4 -[x] sect5info - Meta-information for a Sect5 -[x] section - A recursive section -[x] sectioninfo - Meta-information for a recursive section -[x] see - Part of an index term directing the reader instead to another entry - in the index -[x] seealso - Part of an index term directing the reader also to another entry - in the index -[ ] seealsoie - A See also entry in an index, rather than in the text -[ ] seeie - A See entry in an index, rather than in the text -[x] seg - An element of a list item in a segmented list -[x] seglistitem - A list item in a segmented list -[x] segmentedlist - A segmented list, a list of sets of elements -[x] segtitle - The title of an element of a list item in a segmented list -[ ] seriesvolnums - Numbers of the volumes in a series of books -[ ] set - A collection of books -[ ] setindex - An index to a set of books -[ ] setindexinfo - Meta-information for a SetIndex -[ ] setinfo - Meta-information for a Set -[ ] sgmltag - A component of SGML markup -[ ] shortaffil - A brief description of an affiliation -[ ] shortcut - A key combination for an action that is also accessible through - a menu -[ ] sidebar - A portion of a document that is isolated from the main - narrative flow -[ ] sidebarinfo - Meta-information for a Sidebar -[x] simpara - A paragraph that contains only text and inline markup, no block - elements -[x] simplelist - An undecorated list of single words or short phrases -[ ] simplemsgentry - A wrapper for a simpler entry in a message set -[ ] simplesect - A section of a document with no subdivisions -[ ] spanspec - Formatting information for a spanned column in a table -[ ] state - A state or province in an address -[ ] step - A unit of action in a procedure -[ ] stepalternatives - Alternative steps in a procedure -[ ] street - A street address in an address -[ ] structfield - A field in a structure (in the programming language sense) -[ ] structname - The name of a structure (in the programming language sense) -[ ] subject - One of a group of terms describing the subject matter of a - document -[ ] subjectset - A set of terms describing the subject matter of a document -[ ] subjectterm - A term in a group of terms describing the subject matter of - a document -[x] subscript - A subscript (as in H2O, the molecular formula for water) -[ ] substeps - A wrapper for steps that occur within steps in a procedure -[x] subtitle - The subtitle of a document -[x] superscript - A superscript (as in x2, the mathematical notation for x - multiplied by itself) -[ ] surname - A family name; in western cultures the last name -[ ] svg:svg - An SVG graphic -[x] symbol - A name that is replaced by a value before processing -[ ] synopfragment - A portion of a CmdSynopsis broken out from the main body - of the synopsis -[ ] synopfragmentref - A reference to a fragment of a command synopsis -[ ] synopsis - A general-purpose element for representing the syntax of - commands or functions -[ ] systemitem - A system-related item or term -[ ] table - A formal table in a document -[ ] task - A task to be completed -[ ] taskprerequisites - The prerequisites for a task -[ ] taskrelated - Information related to a task -[ ] tasksummary - A summary of a task -[x] tbody - A wrapper for the rows of a table or informal table -[x] td - A table entry in an HTML table -[x] term - The word or phrase being defined or described in a variable list -[ ] termdef - An inline term definition -[ ] tertiary - A tertiary word or phrase in an index term -[ ] tertiaryie - A tertiary term in an index entry, rather than in the text -[ ] textdata - Pointer to external text data -[ ] textobject - A wrapper for a text description of an object and its - associated meta-information -[ ] tfoot - A table footer consisting of one or more rows -[x] tgroup - A wrapper for the main content of a table, or part of a table -[x] th - A table header entry in an HTML table -[x] thead - A table header consisting of one or more rows -[x] tip - A suggestion to the user, set off from the text -[x] title - The text of the title of a section of a document or of a formal - block-level element -[x] titleabbrev - The abbreviation of a Title -[x] toc - A table of contents -[x] tocback - An entry in a table of contents for a back matter component -[x] tocchap - An entry in a table of contents for a component in the body of - a document -[x] tocentry - A component title in a table of contents -[x] tocfront - An entry in a table of contents for a front matter component -[x] toclevel1 - A top-level entry within a table of contents entry for a - chapter-like component -[x] toclevel2 - A second-level entry within a table of contents entry for a - chapter-like component -[x] toclevel3 - A third-level entry within a table of contents entry for a - chapter-like component -[x] toclevel4 - A fourth-level entry within a table of contents entry for a - chapter-like component -[x] toclevel5 - A fifth-level entry within a table of contents entry for a - chapter-like component -[x] tocpart - An entry in a table of contents for a part of a book -[ ] token - A unit of information -[x] tr - A row in an HTML table -[ ] trademark - A trademark -[x] type - The classification of a value -[x] ulink - A link that addresses its target by means of a URL - (Uniform Resource Locator) -[x] uri - A Uniform Resource Identifier -[x] userinput - Data entered by the user -[x] varargs - An empty element in a function synopsis indicating a variable - number of arguments -[x] variablelist - A list in which each entry is composed of a set of one or - more terms and an associated description -[x] varlistentry - A wrapper for a set of terms and the associated description - in a variable list -[x] varname - The name of a variable -[ ] videodata - Pointer to external video data -[ ] videoobject - A wrapper for video data and its associated meta-information -[ ] void - An empty element in a function synopsis indicating that the - function in question takes no arguments -[ ] volumenum - The volume number of a document in a set (as of books in a set - or articles in a journal) -[x] warning - An admonition set off from the text -[x] wordasword - A word meant specifically as a word and not representing - anything else -[x] xref - A cross reference to another part of the document -[ ] year - The year of publication of a document -[x] ?asciidoc-br? - line break from asciidoc docbook output --} - -type DB m = StateT DBState m - -data DBState = DBState{ dbSectionLevel :: Int - , dbQuoteType :: QuoteType - , dbMeta :: Meta - , dbAcceptsMeta :: Bool - , dbBook :: Bool - , dbFigureTitle :: Inlines - , dbContent :: [Content] - } deriving Show - -instance Default DBState where - def = DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - , dbContent = [] } - - -readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc -readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ inp - (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree - return $ Pandoc (dbMeta st') (toList . mconcat $ bs) - --- We treat <?asciidoc-br?> specially (issue #1236), converting it --- to <br/>, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs - -getFigure :: PandocMonad m => Element -> DB m Blocks -getFigure e = do - tit <- case filterChild (named "title") e of - Just t -> getInlines t - Nothing -> return mempty - modify $ \st -> st{ dbFigureTitle = tit } - res <- getBlocks e - modify $ \st -> st{ dbFigureTitle = mempty } - return res - --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) - --- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String -attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" - --- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s - --- - -acceptingMetadata :: PandocMonad m => DB m a -> DB m a -acceptingMetadata p = do - modify (\s -> s { dbAcceptsMeta = True } ) - res <- p - modify (\s -> s { dbAcceptsMeta = False }) - return res - -checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a -checkInMeta p = do - accepts <- dbAcceptsMeta <$> get - when accepts p - return mempty - -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () -addMeta field val = modify (setMeta field val) - -instance HasMeta DBState where - setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)} - deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)} - -isBlockElement :: Content -> Bool -isBlockElement (Elem e) = qName (elName e) `elem` blocktags - where blocktags = ["toc","index","para","formalpara","simpara", - "ackno","epigraph","blockquote","bibliography","bibliodiv", - "biblioentry","glossee","glosseealso","glossary", - "glossdiv","glosslist","chapter","appendix","preface", - "bridgehead","sect1","sect2","sect3","sect4","sect5","section", - "refsect1","refsect2","refsect3","refsection", - "important","caution","note","tip","warning","qandadiv", - "question","answer","abstract","itemizedlist","orderedlist", - "variablelist","article","book","table","informaltable", - "informalexample", "linegroup", - "screen","programlisting","example","calloutlist"] -isBlockElement _ = False - --- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs - --- meld text into beginning of first paragraph of Blocks. --- assumes Blocks start with a Para; if not, does nothing. -addToStart :: Inlines -> Blocks -> Blocks -addToStart toadd bs = - case toList bs of - (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest - _ -> bs - --- function that is used by both mediaobject (in parseBlock) --- and inlinemediaobject (in parseInline) --- A DocBook mediaobject is a wrapper around a set of alternative presentations -getMediaobject :: PandocMonad m => Element -> DB m Inlines -getMediaobject e = do - (imageUrl, attr) <- - case filterChild (named "imageobject") e of - Nothing -> return (mempty, nullAttr) - Just z -> case filterChild (named "imagedata") z of - Nothing -> return (mempty, nullAttr) - Just i -> let atVal a = attrValue a i - w = case atVal "width" of - "" -> [] - d -> [("width", d)] - h = case atVal "depth" of - "" -> [] - d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) - in return (atVal "fileref", atr) - let getCaption el = case filterChild (\x -> named "caption" x - || named "textobject" x - || named "alt" x) el of - Nothing -> return mempty - Just z -> mconcat <$> (mapM parseInline $ elContent z) - figTitle <- gets dbFigureTitle - let (caption, title) = if isNull figTitle - then (getCaption e, "") - else (return figTitle, "fig:") - liftM (imageWith attr imageUrl title) caption - -getBlocks :: PandocMonad m => Element -> DB m Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) - - -parseBlock :: PandocMonad m => Content -> DB m Blocks -parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s - then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x -parseBlock (Elem e) = - case qName (elName e) of - "toc" -> return mempty -- skip TOC, since in pandoc it's autogenerated - "index" -> return mempty -- skip index, since page numbers meaningless - "para" -> parseMixed para (elContent e) - "formalpara" -> do - tit <- case filterChild (named "title") e of - Just t -> (para . strong . (<> str ".")) <$> - getInlines t - Nothing -> return mempty - (tit <>) <$> parseMixed para (elContent e) - "simpara" -> parseMixed para (elContent e) - "ackno" -> parseMixed para (elContent e) - "epigraph" -> parseBlockquote - "blockquote" -> parseBlockquote - "attribution" -> return mempty - "titleabbrev" -> return mempty - "authorinitials" -> return mempty - "title" -> checkInMeta getTitle - "author" -> checkInMeta getAuthor - "authorgroup" -> checkInMeta getAuthorGroup - "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") - "date" -> checkInMeta getDate - "bibliography" -> sect 0 - "bibliodiv" -> sect 1 - "biblioentry" -> parseMixed para (elContent e) - "bibliomixed" -> parseMixed para (elContent e) - "glosssee" -> para . (\ils -> text "See " <> ils <> str ".") - <$> getInlines e - "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".") - <$> getInlines e - "glossary" -> sect 0 - "glossdiv" -> definitionList <$> - mapM parseGlossEntry (filterChildren (named "glossentry") e) - "glosslist" -> definitionList <$> - mapM parseGlossEntry (filterChildren (named "glossentry") e) - "chapter" -> sect 0 - "appendix" -> sect 0 - "preface" -> sect 0 - "bridgehead" -> para . strong <$> getInlines e - "sect1" -> sect 1 - "sect2" -> sect 2 - "sect3" -> sect 3 - "sect4" -> sect 4 - "sect5" -> sect 5 - "section" -> gets dbSectionLevel >>= sect . (+1) - "refsect1" -> sect 1 - "refsect2" -> sect 2 - "refsect3" -> sect 3 - "refsection" -> gets dbSectionLevel >>= sect . (+1) - "important" -> blockQuote . (para (strong $ str "Important") <>) - <$> getBlocks e - "caution" -> blockQuote . (para (strong $ str "Caution") <>) - <$> getBlocks e - "note" -> blockQuote . (para (strong $ str "Note") <>) - <$> getBlocks e - "tip" -> blockQuote . (para (strong $ str "Tip") <>) - <$> getBlocks e - "warning" -> blockQuote . (para (strong $ str "Warning") <>) - <$> getBlocks e - "area" -> return mempty - "areaset" -> return mempty - "areaspec" -> return mempty - "qandadiv" -> gets dbSectionLevel >>= sect . (+1) - "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e - "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e - "abstract" -> blockQuote <$> getBlocks e - "calloutlist" -> bulletList <$> callouts - "itemizedlist" -> bulletList <$> listitems - "orderedlist" -> do - let listStyle = case attrValue "numeration" e of - "arabic" -> Decimal - "loweralpha" -> LowerAlpha - "upperalpha" -> UpperAlpha - "lowerroman" -> LowerRoman - "upperroman" -> UpperRoman - _ -> Decimal - let start = fromMaybe 1 $ - (attrValue "override" <$> filterElement (named "listitem") e) - >>= safeRead - orderedListWith (start,listStyle,DefaultDelim) - <$> listitems - "variablelist" -> definitionList <$> deflistitems - "figure" -> getFigure e - "mediaobject" -> para <$> getMediaobject e - "caption" -> return mempty - "info" -> metaBlock - "articleinfo" -> metaBlock - "sectioninfo" -> return mempty -- keywords & other metadata - "refsectioninfo" -> return mempty -- keywords & other metadata - "refsect1info" -> return mempty -- keywords & other metadata - "refsect2info" -> return mempty -- keywords & other metadata - "refsect3info" -> return mempty -- keywords & other metadata - "sect1info" -> return mempty -- keywords & other metadata - "sect2info" -> return mempty -- keywords & other metadata - "sect3info" -> return mempty -- keywords & other metadata - "sect4info" -> return mempty -- keywords & other metadata - "sect5info" -> return mempty -- keywords & other metadata - "chapterinfo" -> return mempty -- keywords & other metadata - "glossaryinfo" -> return mempty -- keywords & other metadata - "appendixinfo" -> return mempty -- keywords & other metadata - "bookinfo" -> metaBlock - "article" -> modify (\st -> st{ dbBook = False }) >> - getBlocks e - "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e - "table" -> parseTable - "informaltable" -> parseTable - "informalexample" -> divWith ("", ["informalexample"], []) <$> - getBlocks e - "linegroup" -> lineBlock <$> lineItems - "literallayout" -> codeBlockWithLang - "screen" -> codeBlockWithLang - "programlisting" -> codeBlockWithLang - "?xml" -> return mempty - _ -> getBlocks e - where parseMixed container conts = do - let (ils,rest) = break isBlockElement conts - ils' <- (trimInlines . mconcat) <$> mapM parseInline ils - let p = if ils' == mempty then mempty else container ils' - case rest of - [] -> return p - (r:rs) -> do - b <- parseBlock r - x <- parseMixed container rs - return $ p <> b <> x - codeBlockWithLang = do - let classes' = case attrValue "language" e of - "" -> [] - x -> [x] - return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e - parseBlockquote = do - attrib <- case filterChild (named "attribution") e of - Nothing -> return mempty - Just z -> (para . (str "— " <>) . mconcat) - <$> (mapM parseInline $ elContent z) - contents <- getBlocks e - return $ blockQuote (contents <> attrib) - listitems = mapM getBlocks $ filterChildren (named "listitem") e - callouts = mapM getBlocks $ filterChildren (named "callout") e - deflistitems = mapM parseVarListEntry $ filterChildren - (named "varlistentry") e - parseVarListEntry e' = do - let terms = filterChildren (named "term") e' - let items = filterChildren (named "listitem") e' - terms' <- mapM getInlines terms - items' <- mapM getBlocks items - return (mconcat $ intersperse (str "; ") terms', items') - parseGlossEntry e' = do - let terms = filterChildren (named "glossterm") e' - let items = filterChildren (named "glossdef") e' - terms' <- mapM getInlines terms - items' <- mapM getBlocks items - return (mconcat $ intersperse (str "; ") terms', items') - getTitle = do - tit <- getInlines e - subtit <- case filterChild (named "subtitle") e of - Just s -> (text ": " <>) <$> - getInlines s - Nothing -> return mempty - addMeta "title" (tit <> subtit) - - getAuthor = (:[]) <$> getInlines e >>= addMeta "author" - getAuthorGroup = do - let terms = filterChildren (named "author") e - mapM getInlines terms >>= addMeta "author" - getDate = getInlines e >>= addMeta "date" - parseTable = do - let isCaption x = named "title" x || named "caption" x - caption <- case filterChild isCaption e of - Just t -> getInlines t - Nothing -> return mempty - let e' = fromMaybe e $ filterChild (named "tgroup") e - let isColspec x = named "colspec" x || named "col" x - let colspecs = case filterChild (named "colgroup") e' of - Just c -> filterChildren isColspec c - _ -> filterChildren isColspec e' - let isRow x = named "row" x || named "tr" x - headrows <- case filterChild (named "thead") e' of - Just h -> case filterChild isRow h of - Just x -> parseRow x - Nothing -> return [] - Nothing -> return [] - bodyrows <- case filterChild (named "tbody") e' of - Just b -> mapM parseRow - $ filterChildren isRow b - Nothing -> mapM parseRow - $ filterChildren isRow e' - let toAlignment c = case findAttr (unqual "align") c of - Just "left" -> AlignLeft - Just "right" -> AlignRight - Just "center" -> AlignCenter - _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> - (x >= '0' && x <= '9') - || x == '.') w - Nothing -> 0 :: Double - let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs - let aligns = case colspecs of - [] -> replicate numrows AlignDefault - cs -> map toAlignment cs - let widths = case colspecs of - [] -> replicate numrows 0 - cs -> let ws = map toWidth cs - tot = sum ws - in if all (> 0) ws - then map (/ tot) ws - else replicate numrows 0 - let headrows' = if null headrows - then replicate numrows mempty - else headrows - return $ table caption (zip aligns widths) - headrows' bodyrows - isEntry x = named "entry" x || named "td" x || named "th" x - parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry - sect n = do isbook <- gets dbBook - let n' = if isbook || n == 0 then n + 1 else n - headerText <- case filterChild (named "title") e `mplus` - (filterChild (named "info") e >>= - filterChild (named "title")) of - Just t -> getInlines t - Nothing -> return mempty - modify $ \st -> st{ dbSectionLevel = n } - b <- getBlocks e - let ident = attrValue "id" e - modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ headerWith (ident,[],[]) n' headerText <> b - lineItems = mapM getInlines $ filterChildren (named "line") e - metaBlock = acceptingMetadata (getBlocks e) >> return mempty - -getInlines :: PandocMonad m => Element -> DB m Inlines -getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') - -strContentRecursive :: Element -> String -strContentRecursive = strContent . - (\e' -> e'{ elContent = map elementToStr $ elContent e' }) - -elementToStr :: Content -> Content -elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing -elementToStr x = x - -parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s -parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref -parseInline (Elem e) = - case qName (elName e) of - "equation" -> equation displayMath - "informalequation" -> equation displayMath - "inlineequation" -> equation math - "subscript" -> subscript <$> innerInlines - "superscript" -> superscript <$> innerInlines - "inlinemediaobject" -> getMediaobject e - "quote" -> do - qt <- gets dbQuoteType - let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote - modify $ \st -> st{ dbQuoteType = qt' } - contents <- innerInlines - modify $ \st -> st{ dbQuoteType = qt } - return $ if qt == SingleQuote - then singleQuoted contents - else doubleQuoted contents - "simplelist" -> simpleList - "segmentedlist" -> segmentedList - "classname" -> codeWithLang - "code" -> codeWithLang - "filename" -> codeWithLang - "literal" -> codeWithLang - "computeroutput" -> codeWithLang - "prompt" -> codeWithLang - "parameter" -> codeWithLang - "option" -> codeWithLang - "optional" -> do x <- getInlines e - return $ str "[" <> x <> str "]" - "markup" -> codeWithLang - "wordasword" -> emph <$> innerInlines - "command" -> codeWithLang - "varname" -> codeWithLang - "function" -> codeWithLang - "type" -> codeWithLang - "symbol" -> codeWithLang - "constant" -> codeWithLang - "userinput" -> codeWithLang - "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) - "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) - "menuchoice" -> menuchoice <$> (mapM parseInline $ - filter isGuiMenu $ elContent e) - "xref" -> do - content <- dbContent <$> get - let linkend = attrValue "linkend" e - let title = case attrValue "endterm" e of - "" -> maybe "???" xrefTitleByElem - (findElementById linkend content) - endterm -> maybe "???" strContent - (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e - "ulink" -> link (attrValue "url" e) "" <$> innerInlines - "link" -> do - ils <- innerInlines - let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> ('#' : attrValue "linkend" e) - let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) - return $ linkWith attr href "" ils' - "foreignphrase" -> emph <$> innerInlines - "emphasis" -> case attrValue "role" e of - "bold" -> strong <$> innerInlines - "strong" -> strong <$> innerInlines - "strikethrough" -> strikeout <$> innerInlines - _ -> emph <$> innerInlines - "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) - "title" -> return mempty - "affiliation" -> return mempty - -- Note: this isn't a real docbook tag; it's what we convert - -- <?asciidor-br?> to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak - _ -> innerInlines - where innerInlines = (trimInlines . mconcat) <$> - (mapM parseInline $ elContent e) - equation constructor = return $ mconcat $ - map (constructor . writeTeX) - $ rights - $ map (readMathML . showElement . everywhere (mkT removePrefix)) - $ filterChildren (\x -> qName (elName x) == "math" && - qPrefix (elName x) == Just "mml") e - removePrefix elname = elname { qPrefix = Nothing } - codeWithLang = do - let classes' = case attrValue "language" e of - "" -> [] - l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e - simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines - (filterChildren (named "member") e) - segmentedList = do - tit <- maybe (return mempty) getInlines $ filterChild (named "title") e - segtits <- mapM getInlines $ filterChildren (named "segtitle") e - segitems <- mapM (mapM getInlines . filterChildren (named "seg")) - $ filterChildren (named "seglistitem") e - let toSeg = mconcat . zipWith (\x y -> strong (x <> str ":") <> space <> - y <> linebreak) segtits - let segs = mconcat $ map toSeg segitems - let tit' = if tit == mempty - then mempty - else strong tit <> linebreak - return $ linebreak <> tit' <> segs - keycombo = spanWith ("",["keycombo"],[]) . - mconcat . intersperse (str "+") - menuchoice = spanWith ("",["menuchoice"],[]) . - mconcat . intersperse (text " > ") - isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || - named "guimenuitem" x - isGuiMenu _ = False - - findElementById idString content - = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] - - -- Use the 'xreflabel' attribute for getting the title of a xref link; - -- if there's no such attribute, employ some heuristics based on what - -- docbook-xsl does. - xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of - "chapter" -> descendantContent "title" el - "sect1" -> descendantContent "title" el - "sect2" -> descendantContent "title" el - "sect3" -> descendantContent "title" el - "sect4" -> descendantContent "title" el - "sect5" -> descendantContent "title" el - "cmdsynopsis" -> descendantContent "command" el - "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" - where - xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent - . filterElementName (\n -> qName n == name) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs deleted file mode 100644 index 8936a0403..000000000 --- a/src/Text/Pandoc/Readers/Docx.hs +++ /dev/null @@ -1,626 +0,0 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings, CPP #-} - -{- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Docx - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable - -Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse) -to 'Pandoc' document. -} - -{- -Current state of implementation of Docx entities ([x] means -implemented, [-] means partially implemented): - -* Blocks - - - [X] Para - - [X] CodeBlock (styled with `SourceCode`) - - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, - indented) - - [X] OrderedList - - [X] BulletList - - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) - - [X] Header (styled with `Heading#`) - - [ ] HorizontalRule - - [-] Table (column widths and alignments not yet implemented) - -* Inlines - - - [X] Str - - [X] Emph (italics and underline both read as Emph) - - [X] Strong - - [X] Strikeout - - [X] Superscript - - [X] Subscript - - [X] SmallCaps - - [ ] Quoted - - [ ] Cite - - [X] Code (styled with `VerbatimChar`) - - [X] Space - - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [X] Math - - [X] Link (links to an arbitrary bookmark create a span with the target as - id and "anchor" class) - - [X] Image - - [X] Note (Footnotes and Endnotes are silently combined.) --} - -module Text.Pandoc.Readers.Docx - ( readDocxWithWarnings - , readDocx - ) where - -import Codec.Archive.Zip -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Builder -import Text.Pandoc.Walk -import Text.Pandoc.Readers.Docx.Parse -import Text.Pandoc.Readers.Docx.Lists -import Text.Pandoc.Readers.Docx.Combine -import Text.Pandoc.Shared -import Text.Pandoc.MediaBag (MediaBag) -import Data.List (delete, intersect) -import Text.TeXMath (writeTeX) -import Data.Default (Default) -import qualified Data.ByteString.Lazy as B -import qualified Data.Map as M -import qualified Data.Set as Set -import Control.Monad.Reader -import Control.Monad.State -import Data.Sequence (ViewL(..), viewl) -import qualified Data.Sequence as Seq (null) -#if !(MIN_VERSION_base(4,8,0)) -import Data.Traversable (traverse) -#endif -import Text.Pandoc.Error -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad) -import qualified Text.Pandoc.Class as P -import Text.Pandoc.Logging - -readDocx :: PandocMonad m - => ReaderOptions - -> B.ByteString - -> m Pandoc -readDocx opts bytes - | Right archive <- toArchiveOrFail bytes - , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do - mapM_ (P.report . DocxParserWarning) parserWarnings - (meta, blks) <- docxToOutput opts docx - return $ Pandoc meta blks -readDocx _ _ = - throwError $ PandocSomeError "couldn't parse docx file" - --- TODO remove this for 2.0: -readDocxWithWarnings :: PandocMonad m - => ReaderOptions - -> B.ByteString - -> m Pandoc -readDocxWithWarnings = readDocx - -data DState = DState { docxAnchorMap :: M.Map String String - , docxMediaBag :: MediaBag - , docxDropCap :: Inlines - , docxWarnings :: [String] - } - -instance Default DState where - def = DState { docxAnchorMap = M.empty - , docxMediaBag = mempty - , docxDropCap = mempty - , docxWarnings = [] - } - -data DEnv = DEnv { docxOptions :: ReaderOptions - , docxInHeaderBlock :: Bool } - -instance Default DEnv where - def = DEnv def False - -type DocxContext m = ReaderT DEnv (StateT DState m) - -evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a -evalDocxContext ctx env st = flip evalStateT st $ flip runReaderT env $ ctx - --- This is empty, but we put it in for future-proofing. -spansToKeep :: [String] -spansToKeep = [] - -divsToKeep :: [String] -divsToKeep = ["list-item", "Definition", "DefinitionTerm"] - -metaStyles :: M.Map String String -metaStyles = M.fromList [ ("Title", "title") - , ("Subtitle", "subtitle") - , ("Author", "author") - , ("Date", "date") - , ("Abstract", "abstract")] - -sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) -sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp)) - -isMetaPar :: BodyPart -> Bool -isMetaPar (Paragraph pPr _) = - not $ null $ intersect (pStyle pPr) (M.keys metaStyles) -isMetaPar _ = False - -isEmptyPar :: BodyPart -> Bool -isEmptyPar (Paragraph _ parParts) = - all isEmptyParPart parParts - where - isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems - isEmptyParPart _ = False - isEmptyElem (TextRun s) = trim s == "" - isEmptyElem _ = True -isEmptyPar _ = False - -bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) -bodyPartsToMeta' [] = return M.empty -bodyPartsToMeta' (bp : bps) - | (Paragraph pPr parParts) <- bp - , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) - , (Just metaField) <- M.lookup c metaStyles = do - inlines <- smushInlines <$> mapM parPartToInlines parParts - remaining <- bodyPartsToMeta' bps - let - f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] - f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks) - f m (MetaList mv) = MetaList (m : mv) - f m n = MetaList [m, n] - return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining -bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps - -bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta -bodyPartsToMeta bps = do - mp <- bodyPartsToMeta' bps - let mp' = - case M.lookup "author" mp of - Just mv -> M.insert "author" (fixAuthors mv) mp - Nothing -> mp - return $ Meta mp' - -fixAuthors :: MetaValue -> MetaValue -fixAuthors (MetaBlocks blks) = - MetaList $ map g $ filter f blks - where f (Para _) = True - f _ = False - g (Para ils) = MetaInlines ils - g _ = MetaInlines [] -fixAuthors mv = mv - -codeStyles :: [String] -codeStyles = ["VerbatimChar"] - -codeDivs :: [String] -codeDivs = ["SourceCode"] - -runElemToInlines :: RunElem -> Inlines -runElemToInlines (TextRun s) = text s -runElemToInlines (LnBrk) = linebreak -runElemToInlines (Tab) = space -runElemToInlines (SoftHyphen) = text "\xad" -runElemToInlines (NoBreakHyphen) = text "\x2011" - -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString (LnBrk) = ['\n'] -runElemToString (Tab) = ['\t'] -runElemToString (SoftHyphen) = ['\xad'] -runElemToString (NoBreakHyphen) = ['\x2011'] - -runToString :: Run -> String -runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" - -parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run -parPartToString (InternalHyperLink _ runs) = concatMap runToString runs -parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" - -blacklistedCharStyles :: [String] -blacklistedCharStyles = ["Hyperlink"] - -resolveDependentRunStyle :: RunStyle -> RunStyle -resolveDependentRunStyle rPr - | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles = - rPr - | Just (_, cs) <- rStyle rPr = - let rPr' = resolveDependentRunStyle cs - in - RunStyle { isBold = case isBold rPr of - Just bool -> Just bool - Nothing -> isBold rPr' - , isItalic = case isItalic rPr of - Just bool -> Just bool - Nothing -> isItalic rPr' - , isSmallCaps = case isSmallCaps rPr of - Just bool -> Just bool - Nothing -> isSmallCaps rPr' - , isStrike = case isStrike rPr of - Just bool -> Just bool - Nothing -> isStrike rPr' - , rVertAlign = case rVertAlign rPr of - Just valign -> Just valign - Nothing -> rVertAlign rPr' - , rUnderline = case rUnderline rPr of - Just ulstyle -> Just ulstyle - Nothing -> rUnderline rPr' - , rStyle = rStyle rPr } - | otherwise = rPr - -runStyleToTransform :: RunStyle -> (Inlines -> Inlines) -runStyleToTransform rPr - | Just (s, _) <- rStyle rPr - , s `elem` spansToKeep = - let rPr' = rPr{rStyle = Nothing} - in - (spanWith ("", [s], [])) . (runStyleToTransform rPr') - | Just True <- isItalic rPr = - emph . (runStyleToTransform rPr {isItalic = Nothing}) - | Just True <- isBold rPr = - strong . (runStyleToTransform rPr {isBold = Nothing}) - | Just True <- isSmallCaps rPr = - smallcaps . (runStyleToTransform rPr {isSmallCaps = Nothing}) - | Just True <- isStrike rPr = - strikeout . (runStyleToTransform rPr {isStrike = Nothing}) - | Just SupScrpt <- rVertAlign rPr = - superscript . (runStyleToTransform rPr {rVertAlign = Nothing}) - | Just SubScrpt <- rVertAlign rPr = - subscript . (runStyleToTransform rPr {rVertAlign = Nothing}) - | Just "single" <- rUnderline rPr = - emph . (runStyleToTransform rPr {rUnderline = Nothing}) - | otherwise = id - -runToInlines :: PandocMonad m => Run -> DocxContext m Inlines -runToInlines (Run rs runElems) - | Just (s, _) <- rStyle rs - , s `elem` codeStyles = - let rPr = resolveDependentRunStyle rs - codeString = code $ concatMap runElemToString runElems - in - return $ case rVertAlign rPr of - Just SupScrpt -> superscript codeString - Just SubScrpt -> subscript codeString - _ -> codeString - | otherwise = do - let ils = smushInlines (map runElemToInlines runElems) - return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils -runToInlines (Footnote bps) = do - blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) - return $ note blksList -runToInlines (Endnote bps) = do - blksList <- smushBlocks <$> (mapM bodyPartToBlocks bps) - return $ note blksList -runToInlines (InlineDrawing fp title alt bs ext) = do - (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt -runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" - -extentToAttr :: Extent -> Attr -extentToAttr (Just (w, h)) = - ("", [], [("width", showDim w), ("height", showDim h)] ) - where - showDim d = show (d / 914400) ++ "in" -extentToAttr _ = nullAttr - -blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines -blocksToInlinesWarn cmtId blks = do - let blkList = toList blks - notParaOrPlain :: Block -> Bool - notParaOrPlain (Para _) = False - notParaOrPlain (Plain _) = False - notParaOrPlain _ = True - when (not $ null $ filter notParaOrPlain blkList) $ - lift $ P.report $ DocxParserWarning $ - "Docx comment " ++ cmtId ++ " will not retain formatting" - return $ fromList $ blocksToInlines blkList - -parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines -parPartToInlines (PlainRun r) = runToInlines r -parPartToInlines (Insertion _ author date runs) = do - opts <- asks docxOptions - case readerTrackChanges opts of - AcceptChanges -> smushInlines <$> mapM runToInlines runs - RejectChanges -> return mempty - AllChanges -> do - ils <- smushInlines <$> mapM runToInlines runs - let attr = ("", ["insertion"], [("author", author), ("date", date)]) - return $ spanWith attr ils -parPartToInlines (Deletion _ author date runs) = do - opts <- asks docxOptions - case readerTrackChanges opts of - AcceptChanges -> return mempty - RejectChanges -> smushInlines <$> mapM runToInlines runs - AllChanges -> do - ils <- smushInlines <$> mapM runToInlines runs - let attr = ("", ["deletion"], [("author", author), ("date", date)]) - return $ spanWith attr ils -parPartToInlines (CommentStart cmtId author date bodyParts) = do - opts <- asks docxOptions - case readerTrackChanges opts of - AllChanges -> do - blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts - ils <- blocksToInlinesWarn cmtId blks - let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) - return $ spanWith attr ils - _ -> return mempty -parPartToInlines (CommentEnd cmtId) = do - opts <- asks docxOptions - case readerTrackChanges opts of - AllChanges -> do - let attr = ("", ["comment-end"], [("id", cmtId)]) - return $ spanWith attr mempty - _ -> return mempty -parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = - return mempty -parPartToInlines (BookMark _ anchor) = - -- We record these, so we can make sure not to overwrite - -- user-defined anchor links with header auto ids. - do - -- get whether we're in a header. - inHdrBool <- asks docxInHeaderBlock - -- Get the anchor map. - anchorMap <- gets docxAnchorMap - -- We don't want to rewrite if we're in a header, since we'll take - -- care of that later, when we make the header anchor. If the - -- bookmark were already in uniqueIdent form, this would lead to a - -- duplication. Otherwise, we check to see if the id is already in - -- there. Rewrite if necessary. This will have the possible effect - -- of rewriting user-defined anchor links. However, since these - -- are not defined in pandoc, it seems like a necessary evil to - -- avoid an extra pass. - let newAnchor = - if not inHdrBool && anchor `elem` (M.elems anchorMap) - then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) - else anchor - unless inHdrBool - (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) - return $ spanWith (newAnchor, ["anchor"], []) mempty -parPartToInlines (Drawing fp title alt bs ext) = do - (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt -parPartToInlines Chart = do - return $ spanWith ("", ["chart"], []) $ text "[CHART]" -parPartToInlines (InternalHyperLink anchor runs) = do - ils <- smushInlines <$> mapM runToInlines runs - return $ link ('#' : anchor) "" ils -parPartToInlines (ExternalHyperLink target runs) = do - ils <- smushInlines <$> mapM runToInlines runs - return $ link target "" ils -parPartToInlines (PlainOMath exps) = do - return $ math $ writeTeX exps -parPartToInlines (SmartTag runs) = do - ils <- smushInlines <$> mapM runToInlines runs - return ils - -isAnchorSpan :: Inline -> Bool -isAnchorSpan (Span (_, classes, kvs) _) = - classes == ["anchor"] && - null kvs -isAnchorSpan _ = False - -dummyAnchors :: [String] -dummyAnchors = ["_GoBack"] - -makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks -makeHeaderAnchor bs = traverse makeHeaderAnchor' bs - -makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block --- If there is an anchor already there (an anchor span in the header, --- to be exact), we rename and associate the new id with the old one. -makeHeaderAnchor' (Header n (ident, classes, kvs) ils) - | (c:_) <- filter isAnchorSpan ils - , (Span (anchIdent, ["anchor"], _) cIls) <- c = do - hdrIDMap <- gets docxAnchorMap - let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) - else ident - newIls = concatMap f ils where f il | il == c = cIls - | otherwise = [il] - modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap} - makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls --- Otherwise we just give it a name, and register that name (associate --- it with itself.) -makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = - do - hdrIDMap <- gets docxAnchorMap - let newIdent = if null ident - then uniqueIdent ils (Set.fromList $ M.elems hdrIDMap) - else ident - modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} - return $ Header n (newIdent, classes, kvs) ils -makeHeaderAnchor' blk = return blk - --- Rewrite a standalone paragraph block as a plain -singleParaToPlain :: Blocks -> Blocks -singleParaToPlain blks - | (Para (ils) :< seeq) <- viewl $ unMany blks - , Seq.null seeq = - singleton $ Plain ils -singleParaToPlain blks = blks - -cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks -cellToBlocks (Cell bps) = do - blks <- smushBlocks <$> mapM bodyPartToBlocks bps - return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks - -rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks] -rowToBlocksList (Row cells) = do - blksList <- mapM cellToBlocks cells - return $ map singleParaToPlain blksList - -trimLineBreaks :: [Inline] -> [Inline] -trimLineBreaks [] = [] -trimLineBreaks (LineBreak : ils) = trimLineBreaks ils -trimLineBreaks ils - | (LineBreak : ils') <- reverse ils = trimLineBreaks (reverse ils') -trimLineBreaks ils = ils - -parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks) -parStyleToTransform pPr - | (c:cs) <- pStyle pPr - , c `elem` divsToKeep = - let pPr' = pPr { pStyle = cs } - in - (divWith ("", [c], [])) . (parStyleToTransform pPr') - | (c:cs) <- pStyle pPr, - c `elem` listParagraphDivs = - let pPr' = pPr { pStyle = cs, indentation = Nothing} - in - (divWith ("", [c], [])) . (parStyleToTransform pPr') - | (_:cs) <- pStyle pPr - , Just True <- pBlockQuote pPr = - let pPr' = pPr { pStyle = cs } - in - blockQuote . (parStyleToTransform pPr') - | (_:cs) <- pStyle pPr = - let pPr' = pPr { pStyle = cs} - in - parStyleToTransform pPr' - | null (pStyle pPr) - , Just left <- indentation pPr >>= leftParIndent - , Just hang <- indentation pPr >>= hangingParIndent = - let pPr' = pPr { indentation = Nothing } - in - case (left - hang) > 0 of - True -> blockQuote . (parStyleToTransform pPr') - False -> parStyleToTransform pPr' - | null (pStyle pPr), - Just left <- indentation pPr >>= leftParIndent = - let pPr' = pPr { indentation = Nothing } - in - case left > 0 of - True -> blockQuote . (parStyleToTransform pPr') - False -> parStyleToTransform pPr' -parStyleToTransform _ = id - -bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks -bodyPartToBlocks (Paragraph pPr parparts) - | not $ null $ codeDivs `intersect` (pStyle pPr) = - return - $ parStyleToTransform pPr - $ codeBlock - $ concatMap parPartToString parparts - | Just (style, n) <- pHeading pPr = do - ils <- local (\s-> s{docxInHeaderBlock=True}) $ - (smushInlines <$> mapM parPartToInlines parparts) - makeHeaderAnchor $ - headerWith ("", delete style (pStyle pPr), []) n ils - | otherwise = do - ils <- smushInlines <$> mapM parPartToInlines parparts >>= - (return . fromList . trimLineBreaks . normalizeSpaces . toList) - dropIls <- gets docxDropCap - let ils' = dropIls <> ils - if dropCap pPr - then do modify $ \s -> s { docxDropCap = ils' } - return mempty - else do modify $ \s -> s { docxDropCap = mempty } - return $ case isNull ils' of - True -> mempty - _ -> parStyleToTransform pPr $ para ils' -bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do - let - kvs = case levelInfo of - (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] - blks <- bodyPartToBlocks (Paragraph pPr parparts) - return $ divWith ("", ["list-item"], kvs) blks -bodyPartToBlocks (ListItem pPr _ _ _ parparts) = - let pPr' = pPr {pStyle = "ListParagraph": (pStyle pPr)} - in - bodyPartToBlocks $ Paragraph pPr' parparts -bodyPartToBlocks (Tbl _ _ _ []) = - return $ para mempty -bodyPartToBlocks (Tbl cap _ look (r:rs)) = do - let caption = text cap - (hdr, rows) = case firstRowFormatting look of - True | null rs -> (Nothing, [r]) - | otherwise -> (Just r, rs) - False -> (Nothing, r:rs) - - cells <- mapM rowToBlocksList rows - - let width = case cells of - r':_ -> length r' - -- shouldn't happen - [] -> 0 - - hdrCells <- case hdr of - Just r' -> rowToBlocksList r' - Nothing -> return $ replicate width mempty - - -- The two following variables (horizontal column alignment and - -- relative column widths) go to the default at the - -- moment. Width information is in the TblGrid field of the Tbl, - -- so should be possible. Alignment might be more difficult, - -- since there doesn't seem to be a column entity in docx. - let alignments = replicate width AlignDefault - widths = replicate width 0 :: [Double] - - return $ table caption (zip alignments widths) hdrCells cells -bodyPartToBlocks (OMathPara e) = do - return $ para $ displayMath (writeTeX e) - - --- replace targets with generated anchors. -rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline -rewriteLink' l@(Link attr ils ('#':target, title)) = do - anchorMap <- gets docxAnchorMap - return $ case M.lookup target anchorMap of - Just newTarget -> (Link attr ils ('#':newTarget, title)) - Nothing -> l -rewriteLink' il = return il - -rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block] -rewriteLinks = mapM (walkM rewriteLink') - -bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block]) -bodyToOutput (Body bps) = do - let (metabps, blkbps) = sepBodyParts bps - meta <- bodyPartsToMeta metabps - blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps - blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks - return $ (meta, blks') - -docxToOutput :: PandocMonad m - => ReaderOptions - -> Docx - -> m (Meta, [Block]) -docxToOutput opts (Docx (Document _ body)) = - let dEnv = def { docxOptions = opts} in - evalDocxContext (bodyToOutput body) dEnv def diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs deleted file mode 100644 index 39e0df825..000000000 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ /dev/null @@ -1,154 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - PatternGuards #-} - -module Text.Pandoc.Readers.Docx.Combine ( smushInlines - , smushBlocks - ) - where - -import Text.Pandoc.Builder -import Data.List -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr, (><), (|>)) -import qualified Data.Sequence as Seq (null) - -data Modifier a = Modifier (a -> a) - | AttrModifier (Attr -> a -> a) Attr - | NullModifier - -spaceOutInlinesL :: Inlines -> (Inlines, Inlines) -spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m - -spaceOutInlinesR :: Inlines -> (Inlines, Inlines) -spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m - -spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) -spaceOutInlines ils = - let (fs, ils') = unstackInlines ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, (stackInlines fs $ trimInlines . Many $ contents), right) - -stackInlines :: [Modifier Inlines] -> Inlines -> Inlines -stackInlines [] ms = ms -stackInlines (NullModifier : fs) ms = stackInlines fs ms -stackInlines ((Modifier f) : fs) ms = - if isEmpty ms - then stackInlines fs ms - else f $ stackInlines fs ms -stackInlines ((AttrModifier f attr) : fs) ms = f attr $ stackInlines fs ms - -unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) -unstackInlines ms = case ilModifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = ilModifier ms - (fs, ms') = unstackInlines $ ilInnards ms - -ilModifier :: Inlines -> Modifier Inlines -ilModifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - -ilInnards :: Inlines -> Inlines -ilInnards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Link _ lst _) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils - -inlinesL :: Inlines -> (Inlines, Inlines) -inlinesL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) - _ -> (mempty, ils) - -inlinesR :: Inlines -> (Inlines, Inlines) -inlinesR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) - _ -> (ils, mempty) - -combineInlines :: Inlines -> Inlines -> Inlines -combineInlines x y = - let (xs', x') = inlinesR x - (y', ys') = inlinesL y - in - xs' <> (combineSingletonInlines x' y') <> ys' - -combineSingletonInlines :: Inlines -> Inlines -> Inlines -combineSingletonInlines x y = - let (xfs, xs) = unstackInlines x - (yfs, ys) = unstackInlines y - shared = xfs `intersect` yfs - x_remaining = xfs \\ shared - y_remaining = yfs \\ shared - x_rem_attr = filter isAttrModifier x_remaining - y_rem_attr = filter isAttrModifier y_remaining - in - case null shared of - True | isEmpty xs && isEmpty ys -> - stackInlines (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> - let (sp, y') = spaceOutInlinesL y in - (stackInlines x_rem_attr mempty) <> sp <> y' - | isEmpty ys -> - let (x', sp) = spaceOutInlinesR x in - x' <> sp <> (stackInlines y_rem_attr mempty) - | otherwise -> - let (x', xsp) = spaceOutInlinesR x - (ysp, y') = spaceOutInlinesL y - in - x' <> xsp <> ysp <> y' - False -> stackInlines shared $ - combineInlines - (stackInlines x_remaining xs) - (stackInlines y_remaining ys) - -combineBlocks :: Blocks -> Blocks -> Blocks -combineBlocks bs cs - | bs' :> (BlockQuote bs'') <- viewr (unMany bs) - , (BlockQuote cs'') :< cs' <- viewl (unMany cs) = - Many $ (bs' |> (BlockQuote (bs'' <> cs''))) >< cs' -combineBlocks bs cs = bs <> cs - -instance (Monoid a, Eq a) => Eq (Modifier a) where - (Modifier f) == (Modifier g) = (f mempty == g mempty) - (AttrModifier f attr) == (AttrModifier g attr') = (f attr mempty == g attr' mempty) - (NullModifier) == (NullModifier) = True - _ == _ = False - -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - -isAttrModifier :: Modifier a -> Bool -isAttrModifier (AttrModifier _ _) = True -isAttrModifier _ = False - -smushInlines :: [Inlines] -> Inlines -smushInlines xs = foldl combineInlines mempty xs - -smushBlocks :: [Blocks] -> Blocks -smushBlocks xs = foldl combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs deleted file mode 100644 index 395a53907..000000000 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ /dev/null @@ -1,229 +0,0 @@ -{- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Docx.Lists - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable - -Functions for converting flat docx paragraphs into nested lists. --} - -module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets - , blocksToDefinitions - , listParagraphDivs - ) where - -import Text.Pandoc.JSON -import Text.Pandoc.Generic (bottomUp) -import Text.Pandoc.Shared (trim) -import Control.Monad -import Data.List -import Data.Maybe - -isListItem :: Block -> Bool -isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True -isListItem _ = False - -getLevel :: Block -> Maybe Integer -getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs -getLevel _ = Nothing - -getLevelN :: Block -> Integer -getLevelN b = case getLevel b of - Just n -> n - Nothing -> -1 - -getNumId :: Block -> Maybe Integer -getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs -getNumId _ = Nothing - -getNumIdN :: Block -> Integer -getNumIdN b = case getNumId b of - Just n -> n - Nothing -> -1 - -getText :: Block -> Maybe String -getText (Div (_, _, kvs) _) = lookup "text" kvs -getText _ = Nothing - -data ListType = Itemized | Enumerated ListAttributes - -listStyleMap :: [(String, ListNumberStyle)] -listStyleMap = [("upperLetter", UpperAlpha), - ("lowerLetter", LowerAlpha), - ("upperRoman", UpperRoman), - ("lowerRoman", LowerRoman), - ("decimal", Decimal)] - -listDelimMap :: [(String, ListNumberDelim)] -listDelimMap = [("%1)", OneParen), - ("(%1)", TwoParens), - ("%1.", Period)] - -getListType :: Block -> Maybe ListType -getListType b@(Div (_, _, kvs) _) | isListItem b = - let - start = lookup "start" kvs - frmt = lookup "format" kvs - txt = lookup "text" kvs - in - case frmt of - Just "bullet" -> Just Itemized - Just f -> - case txt of - Just t -> Just $ Enumerated ( - read (fromMaybe "1" start) :: Int, - fromMaybe DefaultStyle (lookup f listStyleMap), - fromMaybe DefaultDelim (lookup t listDelimMap)) - Nothing -> Nothing - _ -> Nothing -getListType _ = Nothing - -listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] - --- This is a first stab at going through and attaching meaning to list --- paragraphs, without an item marker, following a list item. We --- assume that these are paragraphs in the same item. - -handleListParagraphs :: [Block] -> [Block] -handleListParagraphs [] = [] -handleListParagraphs ( - (Div attr1@(_, classes1, _) blks1) : - (Div (ident2, classes2, kvs2) blks2) : - blks - ) | "list-item" `elem` classes1 && - not ("list-item" `elem` classes2) && - (not . null) (listParagraphDivs `intersect` classes2) = - -- We don't want to keep this indent. - let newDiv2 = - (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) - in - handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) -handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) - -separateBlocks' :: Block -> [[Block]] -> [[Block]] -separateBlocks' blk ([] : []) = [[blk]] -separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] --- The following is for the invisible bullet lists. This is how --- pandoc-generated ooxml does multiparagraph item lists. -separateBlocks' b acc | liftM trim (getText b) == Just "" = - (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b acc = acc ++ [[b]] - -separateBlocks :: [Block] -> [[Block]] -separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) - -flatToBullets' :: Integer -> [Block] -> [Block] -flatToBullets' _ [] = [] -flatToBullets' num xs@(b : elems) - | getLevelN b == num = b : (flatToBullets' num elems) - | otherwise = - let bNumId = getNumIdN b - bLevel = getLevelN b - (children, remaining) = - span - (\b' -> - ((getLevelN b') > bLevel || - ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) - xs - in - case getListType b of - Just (Enumerated attr) -> - (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - _ -> - (BulletList (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - -flatToBullets :: [Block] -> [Block] -flatToBullets elems = flatToBullets' (-1) elems - -singleItemHeaderToHeader :: Block -> Block -singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h -singleItemHeaderToHeader blk = blk - - -blocksToBullets :: [Block] -> [Block] -blocksToBullets blks = - map singleItemHeaderToHeader $ - bottomUp removeListDivs $ - flatToBullets $ (handleListParagraphs blks) - -plainParaInlines :: Block -> [Inline] -plainParaInlines (Plain ils) = ils -plainParaInlines (Para ils) = ils -plainParaInlines _ = [] - -blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] -blocksToDefinitions' [] acc [] = reverse acc -blocksToDefinitions' defAcc acc [] = - reverse $ (DefinitionList (reverse defAcc)) : acc -blocksToDefinitions' defAcc acc - ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - pair = case remainingAttr2 == ("", [], []) of - True -> (concatMap plainParaInlines blks1, [blks2]) - False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) - in - blocksToDefinitions' (pair : defAcc) acc blks -blocksToDefinitions' defAcc acc - ((Div (ident2, classes2, kvs2) blks2) : blks) - | (not . null) defAcc && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - ((defTerm, defItems):defs) = defAcc - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs - in - blocksToDefinitions' defAcc' acc blks -blocksToDefinitions' [] acc (b:blks) = - blocksToDefinitions' [] (b:acc) blks -blocksToDefinitions' defAcc acc (b:blks) = - blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks - -removeListDivs' :: Block -> [Block] -removeListDivs' (Div (ident, classes, kvs) blks) - | "list-item" `elem` classes = - case delete "list-item" classes of - [] -> blks - classes' -> [Div (ident, classes', kvs) $ blks] -removeListDivs' (Div (ident, classes, kvs) blks) - | not $ null $ listParagraphDivs `intersect` classes = - case classes \\ listParagraphDivs of - [] -> blks - classes' -> [Div (ident, classes', kvs) blks] -removeListDivs' blk = [blk] - -removeListDivs :: [Block] -> [Block] -removeListDivs = concatMap removeListDivs' - - - -blocksToDefinitions :: [Block] -> [Block] -blocksToDefinitions = blocksToDefinitions' [] [] diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs deleted file mode 100644 index 221a1d10a..000000000 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ /dev/null @@ -1,1044 +0,0 @@ -{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-} - -{- -Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Docx.Parse - Copyright : Copyright (C) 2014-2016 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> - Stability : alpha - Portability : portable - -Conversion of docx archive into Docx haskell type --} - -module Text.Pandoc.Readers.Docx.Parse ( Docx(..) - , Document(..) - , Body(..) - , BodyPart(..) - , TblLook(..) - , Extent - , ParPart(..) - , Run(..) - , RunElem(..) - , Notes - , Numbering - , Relationship - , Media - , RunStyle(..) - , VertAlign(..) - , ParIndentation(..) - , ParagraphStyle(..) - , Row(..) - , Cell(..) - , archiveToDocx - , archiveToDocxWithWarnings - ) where -import Codec.Archive.Zip -import Text.XML.Light -import Data.Maybe -import Data.List -import System.FilePath -import Data.Bits ((.|.)) -import qualified Data.ByteString.Lazy as B -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad.Reader -import Control.Monad.State -import Control.Applicative ((<|>)) -import qualified Data.Map as M -import Control.Monad.Except -import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive) -import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..)) -import Text.TeXMath (Exp) -import Text.Pandoc.Readers.Docx.Util -import Data.Char (readLitChar, ord, chr, isDigit) - -data ReaderEnv = ReaderEnv { envNotes :: Notes - , envComments :: Comments - , envNumbering :: Numbering - , envRelationships :: [Relationship] - , envMedia :: Media - , envFont :: Maybe Font - , envCharStyles :: CharStyleMap - , envParStyles :: ParStyleMap - , envLocation :: DocumentLocation - } - deriving Show - -data ReaderState = ReaderState { stateWarnings :: [String] } - deriving Show - -data DocxError = DocxError | WrongElem - deriving Show - -type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) - -runD :: D a -> ReaderEnv -> ReaderState -> (Either DocxError a, ReaderState) -runD dx re rs = runState (runReaderT (runExceptT dx) re) rs - -maybeToD :: Maybe a -> D a -maybeToD (Just a) = return a -maybeToD Nothing = throwError DocxError - -eitherToD :: Either a b -> D b -eitherToD (Right b) = return b -eitherToD (Left _) = throwError DocxError - -concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] -concatMapM f xs = liftM concat (mapM f xs) - - --- This is similar to `mapMaybe`: it maps a function returning the D --- monad over a list, and only keeps the non-erroring return values. -mapD :: (a -> D b) -> [a] -> D [b] -mapD f xs = - let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return []) - in - concatMapM handler xs - -data Docx = Docx Document - deriving Show - -data Document = Document NameSpaces Body - deriving Show - -data Body = Body [BodyPart] - deriving Show - -type Media = [(FilePath, B.ByteString)] - -type CharStyle = (String, RunStyle) - -type ParStyle = (String, ParStyleData) - -type CharStyleMap = M.Map String RunStyle - -type ParStyleMap = M.Map String ParStyleData - -data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] - deriving Show - -data Numb = Numb String String -- right now, only a key to an abstract num - deriving Show - -data AbstractNumb = AbstractNumb String [Level] - deriving Show - --- (ilvl, format, string, start) -type Level = (String, String, String, Maybe Integer) - -data DocumentLocation = InDocument | InFootnote | InEndnote - deriving (Eq,Show) - -data Relationship = Relationship DocumentLocation RelId Target - deriving Show - -data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) - deriving Show - -data Comments = Comments NameSpaces (M.Map String Element) - deriving Show - -data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer - , rightParIndent :: Maybe Integer - , hangingParIndent :: Maybe Integer} - deriving Show - -data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indentation :: Maybe ParIndentation - , dropCap :: Bool - , pHeading :: Maybe (String, Int) - , pNumInfo :: Maybe (String, String) - , pBlockQuote :: Maybe Bool - } - deriving Show - -defaultParagraphStyle :: ParagraphStyle -defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indentation = Nothing - , dropCap = False - , pHeading = Nothing - , pNumInfo = Nothing - , pBlockQuote = Nothing - } - - -data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] - | OMathPara [Exp] - deriving Show - -type TblGrid = [Integer] - -data TblLook = TblLook {firstRowFormatting::Bool} - deriving Show - -defaultTblLook :: TblLook -defaultTblLook = TblLook{firstRowFormatting = False} - -data Row = Row [Cell] - deriving Show - -data Cell = Cell [BodyPart] - deriving Show - --- (width, height) in EMUs -type Extent = Maybe (Double, Double) - -data ParPart = PlainRun Run - | Insertion ChangeId Author ChangeDate [Run] - | Deletion ChangeId Author ChangeDate [Run] - | CommentStart CommentId Author CommentDate [BodyPart] - | CommentEnd CommentId - | BookMark BookMarkId Anchor - | InternalHyperLink Anchor [Run] - | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt - | Chart -- placeholder for now - | PlainOMath [Exp] - | SmartTag [Run] - deriving Show - -data Run = Run RunStyle [RunElem] - | Footnote [BodyPart] - | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt - | InlineChart -- placeholder - deriving Show - -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen - deriving Show - -data VertAlign = BaseLn | SupScrpt | SubScrpt - deriving Show - -data RunStyle = RunStyle { isBold :: Maybe Bool - , isItalic :: Maybe Bool - , isSmallCaps :: Maybe Bool - , isStrike :: Maybe Bool - , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String - , rStyle :: Maybe CharStyle} - deriving Show - -data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) - , isBlockQuote :: Maybe Bool - , numInfo :: Maybe (String, String) - , psStyle :: Maybe ParStyle} - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = Nothing - , isItalic = Nothing - , isSmallCaps = Nothing - , isStrike = Nothing - , rVertAlign = Nothing - , rUnderline = Nothing - , rStyle = Nothing} - -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String - -archiveToDocx :: Archive -> Either DocxError Docx -archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive - -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) -archiveToDocxWithWarnings archive = do - let notes = archiveToNotes archive - comments = archiveToComments archive - numbering = archiveToNumbering archive - rels = archiveToRelationships archive - media = filteredFilesFromArchive archive filePathIsMedia - (styles, parstyles) = archiveToStyles archive - rEnv = - ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument - rState = ReaderState { stateWarnings = [] } - (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState - case eitherDoc of - Right doc -> Right (Docx doc, stateWarnings st) - Left e -> Left e - - - -archiveToDocument :: Archive -> D Document -archiveToDocument zf = do - entry <- maybeToD $ findEntryByPath "word/document.xml" zf - docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = elemToNameSpaces docElem - bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - body <- elemToBody namespaces bodyElem - return $ Document namespaces body - -elemToBody :: NameSpaces -> Element -> D Body -elemToBody ns element | isElem ns "w" "body" element = - mapD (elemToBodyPart ns) (elChildren element) >>= - (\bps -> return $ Body bps) -elemToBody _ _ = throwError WrongElem - -archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap) -archiveToStyles zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe CharStyle), - M.fromList $ buildBasedOnList namespaces styElem - (Nothing :: Maybe ParStyle) ) - -isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool -isBasedOnStyle ns element parentStyle - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" - , Just ps <- parentStyle = (basedOnVal == getStyleId ps) - | isElem ns "w" "style" element - , Just styleType <- findAttrByName ns "w" "type" element - , styleType == cStyleType parentStyle - , Nothing <- findChildByName ns "w" "basedOn" element - , Nothing <- parentStyle = True - | otherwise = False - -class ElemToStyle a where - cStyleType :: Maybe a -> String - elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a - getStyleId :: a -> String - -instance ElemToStyle CharStyle where - cStyleType _ = "character" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "character" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToRunStyle ns element parentStyle) - | otherwise = Nothing - getStyleId s = fst s - -instance ElemToStyle ParStyle where - cStyleType _ = "paragraph" - elemToStyle ns element parentStyle - | isElem ns "w" "style" element - , Just "paragraph" <- findAttrByName ns "w" "type" element - , Just styleId <- findAttrByName ns "w" "styleId" element = - Just (styleId, elemToParStyleData ns element parentStyle) - | otherwise = Nothing - getStyleId s = fst s - -getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -getStyleChildren ns element parentStyle - | isElem ns "w" "styles" element = - mapMaybe (\e -> elemToStyle ns e parentStyle) $ - filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element - | otherwise = [] - -buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a] -buildBasedOnList ns element rootStyle = - case (getStyleChildren ns element rootStyle) of - [] -> [] - stys -> stys ++ - (concatMap (\s -> buildBasedOnList ns element (Just s)) stys) - -archiveToNotes :: Archive -> Notes -archiveToNotes zf = - let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - fn_namespaces = case fnElem of - Just e -> elemToNameSpaces e - Nothing -> [] - en_namespaces = case enElem of - Just e -> elemToNameSpaces e - Nothing -> [] - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") - in - Notes ns fn en - -archiveToComments :: Archive -> Comments -archiveToComments zf = - let cmtsElem = findEntryByPath "word/comments.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - cmts_namespaces = case cmtsElem of - Just e -> elemToNameSpaces e - Nothing -> [] - cmts = (elemToComments cmts_namespaces) <$> cmtsElem - in - case cmts of - Just c -> Comments cmts_namespaces c - Nothing -> Comments cmts_namespaces M.empty - -filePathToRelType :: FilePath -> Maybe DocumentLocation -filePathToRelType "word/_rels/document.xml.rels" = Just InDocument -filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote -filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote -filePathToRelType _ = Nothing - -relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship -relElemToRelationship relType element | qName (elName element) == "Relationship" = - do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship relType relId target -relElemToRelationship _ _ = Nothing - -filePathToRelationships :: Archive -> FilePath -> [Relationship] -filePathToRelationships ar fp | Just relType <- filePathToRelType fp - , Just entry <- findEntryByPath fp ar - , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = - mapMaybe (relElemToRelationship relType) $ elChildren relElems -filePathToRelationships _ _ = [] - -archiveToRelationships :: Archive -> [Relationship] -archiveToRelationships archive = - concatMap (filePathToRelationships archive) $ filesInArchive archive - -filePathIsMedia :: FilePath -> Bool -filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") - -lookupLevel :: String -> String -> Numbering -> Maybe Level -lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do - absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs - lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl - - -numElemToNum :: NameSpaces -> Element -> Maybe Numb -numElemToNum ns element - | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element - absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" - return $ Numb numId absNumId -numElemToNum _ _ = Nothing - -absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb -absNumElemToAbsNum ns element - | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element - let levelElems = findChildrenByName ns "w" "lvl" element - levels = mapMaybe (levelElemToLevel ns) levelElems - return $ AbstractNumb absNumId levels -absNumElemToAbsNum _ _ = Nothing - -levelElemToLevel :: NameSpaces -> Element -> Maybe Level -levelElemToLevel ns element - | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element - fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" - txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" - let start = findChildByName ns "w" "start" element - >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) - return (ilvl, fmt, txt, start) -levelElemToLevel _ _ = Nothing - -archiveToNumbering' :: Archive -> Maybe Numbering -archiveToNumbering' zf = do - case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] - Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = elemToNameSpaces numberingElem - numElems = findChildrenByName namespaces "w" "num" numberingElem - absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem - nums = mapMaybe (numElemToNum namespaces) numElems - absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems - return $ Numbering namespaces nums absNums - -archiveToNumbering :: Archive -> Numbering -archiveToNumbering archive = - fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) - -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) -elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = - let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= - (\a -> Just (a, e))) - (findChildrenByName ns "w" notetype element) - in - Just $ M.fromList $ pairs -elemToNotes _ _ _ = Nothing - -elemToComments :: NameSpaces -> Element -> M.Map String Element -elemToComments ns element - | isElem ns "w" "comments" element = - let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= - (\a -> Just (a, e))) - (findChildrenByName ns "w" "comment" element) - in - M.fromList $ pairs -elemToComments _ _ = M.empty - - ---------------------------------------------- ---------------------------------------------- - -elemToTblGrid :: NameSpaces -> Element -> D TblGrid -elemToTblGrid ns element | isElem ns "w" "tblGrid" element = - let cols = findChildrenByName ns "w" "gridCol" element - in - mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger)) - cols -elemToTblGrid _ _ = throwError WrongElem - -elemToTblLook :: NameSpaces -> Element -> D TblLook -elemToTblLook ns element | isElem ns "w" "tblLook" element = - let firstRow = findAttrByName ns "w" "firstRow" element - val = findAttrByName ns "w" "val" element - firstRowFmt = - case firstRow of - Just "1" -> True - Just _ -> False - Nothing -> case val of - Just bitMask -> testBitMask bitMask 0x020 - Nothing -> False - in - return $ TblLook{firstRowFormatting = firstRowFmt} -elemToTblLook _ _ = throwError WrongElem - -elemToRow :: NameSpaces -> Element -> D Row -elemToRow ns element | isElem ns "w" "tr" element = - do - let cellElems = findChildrenByName ns "w" "tc" element - cells <- mapD (elemToCell ns) cellElems - return $ Row cells -elemToRow _ _ = throwError WrongElem - -elemToCell :: NameSpaces -> Element -> D Cell -elemToCell ns element | isElem ns "w" "tc" element = - do - cellContents <- mapD (elemToBodyPart ns) (elChildren element) - return $ Cell cellContents -elemToCell _ _ = throwError WrongElem - -elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation -elemToParIndentation ns element | isElem ns "w" "ind" element = - Just $ ParIndentation { - leftParIndent = - findAttrByName ns "w" "left" element >>= - stringToInteger - , rightParIndent = - findAttrByName ns "w" "right" element >>= - stringToInteger - , hangingParIndent = - findAttrByName ns "w" "hanging" element >>= - stringToInteger} -elemToParIndentation _ _ = Nothing - -testBitMask :: String -> Int -> Bool -testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of - [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) - -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - -elemToBodyPart :: NameSpaces -> Element -> D BodyPart -elemToBodyPart ns element - | isElem ns "w" "p" element - , (c:_) <- findChildrenByName ns "m" "oMathPara" element = - do - expsLst <- eitherToD $ readOMML $ showElement c - return $ OMathPara expsLst -elemToBodyPart ns element - | isElem ns "w" "p" element - , Just (numId, lvl) <- getNumInfo ns element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty - parparts <- mapD (elemToParPart ns) (elChildren element) - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts -elemToBodyPart ns element - | isElem ns "w" "p" element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty - parparts <- mapD (elemToParPart ns) (elChildren element) - -- Word uses list enumeration for numbered headings, so we only - -- want to infer a list from the styles if it is NOT a heading. - case pHeading parstyle of - Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - num <- asks envNumbering - let levelInfo = lookupLevel numId lvl num - return $ ListItem parstyle numId lvl levelInfo parparts - _ -> return $ Paragraph parstyle parparts -elemToBodyPart ns element - | isElem ns "w" "tbl" element = do - let caption' = findChildByName ns "w" "tblPr" element - >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" - caption = (fromMaybe "" caption') - grid' = case findChildByName ns "w" "tblGrid" element of - Just g -> elemToTblGrid ns g - Nothing -> return [] - tblLook' = case findChildByName ns "w" "tblPr" element >>= - findChildByName ns "w" "tblLook" - of - Just l -> elemToTblLook ns l - Nothing -> return defaultTblLook - - grid <- grid' - tblLook <- tblLook' - rows <- mapD (elemToRow ns) (elChildren element) - return $ Tbl caption grid tblLook rows -elemToBodyPart _ _ = throwError WrongElem - -lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target -lookupRelationship docLocation relid rels = - lookup (docLocation, relid) pairs - where - pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels - -expandDrawingId :: String -> D (FilePath, B.ByteString) -expandDrawingId s = do - location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) - case target of - Just filepath -> do - bytes <- asks (lookup ("word/" ++ filepath) . envMedia) - case bytes of - Just bs -> return (filepath, bs) - Nothing -> throwError DocxError - Nothing -> throwError DocxError - -getTitleAndAlt :: NameSpaces -> Element -> (String, String) -getTitleAndAlt ns element = - let mbDocPr = findChildByName ns "wp" "inline" element >>= - findChildByName ns "wp" "docPr" - title = case mbDocPr >>= findAttrByName ns "" "title" of - Just title' -> title' - Nothing -> "" - alt = case mbDocPr >>= findAttrByName ns "" "descr" of - Just alt' -> alt' - Nothing -> "" - in (title, alt) - -elemToParPart :: NameSpaces -> Element -> D ParPart -elemToParPart ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChildByName ns "w" "drawing" element - , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" - , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem - = let (title, alt) = getTitleAndAlt ns drawingElem - a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" - in - case drawing of - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) - Nothing -> throwError WrongElem --- The below is an attempt to deal with images in deprecated vml format. -elemToParPart ns element - | isElem ns "w" "r" element - , Just _ <- findChildByName ns "w" "pict" element = - let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" - in - case drawing of - -- Todo: check out title and attr for deprecated format. - Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) - Nothing -> throwError WrongElem --- Chart -elemToParPart ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChildByName ns "w" "drawing" element - , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" - , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem - = return Chart -elemToParPart ns element - | isElem ns "w" "r" element = - elemToRun ns element >>= (\r -> return $ PlainRun r) -elemToParPart ns element - | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ Insertion cId cAuthor cDate runs -elemToParPart ns element - | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ Deletion cId cAuthor cDate runs -elemToParPart ns element - | isElem ns "w" "smartTag" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ SmartTag runs -elemToParPart ns element - | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = - return $ BookMark bmId bmName -elemToParPart ns element - | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do - location <- asks envLocation - runs <- mapD (elemToRun ns) (elChildren element) - rels <- asks envRelationships - case lookupRelationship location relId rels of - Just target -> do - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs - Nothing -> return $ ExternalHyperLink target runs - Nothing -> return $ ExternalHyperLink "" runs -elemToParPart ns element - | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ InternalHyperLink anchor runs -elemToParPart ns element - | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do - (Comments _ commentMap) <- asks envComments - case M.lookup cmtId commentMap of - Just cmtElem -> elemToCommentStart ns cmtElem - Nothing -> throwError WrongElem -elemToParPart ns element - | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = - return $ CommentEnd cmtId -elemToParPart ns element - | isElem ns "m" "oMath" element = - (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) -elemToParPart _ _ = throwError WrongElem - -elemToCommentStart :: NameSpaces -> Element -> D ParPart -elemToCommentStart ns element - | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do - bps <- mapD (elemToBodyPart ns) (elChildren element) - return $ CommentStart cmtId cmtAuthor cmtDate bps -elemToCommentStart _ _ = throwError WrongElem - -lookupFootnote :: String -> Notes -> Maybe Element -lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s) - -lookupEndnote :: String -> Notes -> Maybe Element -lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s) - -elemToExtent :: Element -> Extent -elemToExtent drawingElem = - case (getDim "cx", getDim "cy") of - (Just w, Just h) -> Just (w, h) - _ -> Nothing - where - wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" - getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead - - -childElemToRun :: NameSpaces -> Element -> D Run -childElemToRun ns element - | isElem ns "w" "drawing" element - , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture" - , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) element - = let (title, alt) = getTitleAndAlt ns element - a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) - in - case drawing of - Just s -> expandDrawingId s >>= - (\(fp, bs) -> return $ InlineDrawing fp title alt bs $ elemToExtent element) - Nothing -> throwError WrongElem -childElemToRun ns element - | isElem ns "w" "drawing" element - , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" - , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element - = return InlineChart -childElemToRun ns element - | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do - notes <- asks envNotes - case lookupFootnote fnId notes of - Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) - return $ Footnote bps - Nothing -> return $ Footnote [] -childElemToRun ns element - | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do - notes <- asks envNotes - case lookupEndnote enId notes of - Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) - return $ Endnote bps - Nothing -> return $ Endnote [] -childElemToRun _ _ = throwError WrongElem - -elemToRun :: NameSpaces -> Element -> D Run -elemToRun ns element - | isElem ns "w" "r" element - , Just altCont <- findChildByName ns "mc" "AlternateContent" element = - do let choices = findChildrenByName ns "mc" "Choice" altCont - choiceChildren = map head $ filter (not . null) $ map elChildren choices - outputs <- mapD (childElemToRun ns) choiceChildren - case outputs of - r : _ -> return r - [] -> throwError WrongElem -elemToRun ns element - | isElem ns "w" "r" element - , Just drawingElem <- findChildByName ns "w" "drawing" element = - childElemToRun ns drawingElem -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChildByName ns "w" "footnoteReference" element = - childElemToRun ns ref -elemToRun ns element - | isElem ns "w" "r" element - , Just ref <- findChildByName ns "w" "endnoteReference" element = - childElemToRun ns ref -elemToRun ns element - | isElem ns "w" "r" element = do - runElems <- elemToRunElems ns element - runStyle <- elemToRunStyleD ns element - return $ Run runStyle runElems -elemToRun _ _ = throwError WrongElem - -getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a -getParentStyleValue field style - | Just value <- field style = Just value - | Just parentStyle <- psStyle style - = getParentStyleValue field (snd parentStyle) -getParentStyleValue _ _ = Nothing - -getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] -> - Maybe a -getParStyleField field stylemap styles - | x <- mapMaybe (\x -> M.lookup x stylemap) styles - , (y:_) <- mapMaybe (getParentStyleValue field) x - = Just y -getParStyleField _ _ _ = Nothing - -elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle -elemToParagraphStyle ns element sty - | Just pPr <- findChildByName ns "w" "pPr" element = - let style = - mapMaybe - (findAttrByName ns "w" "val") - (findChildrenByName ns "w" "pStyle" pPr) - in ParagraphStyle - {pStyle = style - , indentation = - findChildByName ns "w" "ind" pPr >>= - elemToParIndentation ns - , dropCap = - case - findChildByName ns "w" "framePr" pPr >>= - findAttrByName ns "w" "dropCap" - of - Just "none" -> False - Just _ -> True - Nothing -> False - , pHeading = getParStyleField headingLev sty style - , pNumInfo = getParStyleField numInfo sty style - , pBlockQuote = getParStyleField isBlockQuote sty style - } -elemToParagraphStyle _ _ _ = defaultParagraphStyle - -checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool -checkOnOff ns rPr tag - | Just t <- findChild tag rPr - , Just val <- findAttrByName ns "w" "val" t = - Just $ case val of - "true" -> True - "false" -> False - "on" -> True - "off" -> False - "1" -> True - "0" -> False - _ -> False - | Just _ <- findChild tag rPr = Just True -checkOnOff _ _ _ = Nothing - -elemToRunStyleD :: NameSpaces -> Element -> D RunStyle -elemToRunStyleD ns element - | Just rPr <- findChildByName ns "w" "rPr" element = do - charStyles <- asks envCharStyles - let parentSty = case - findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" - of - Just styName | Just style <- M.lookup styName charStyles -> - Just (styName, style) - _ -> Nothing - return $ elemToRunStyle ns element parentSty -elemToRunStyleD _ _ = return defaultRunStyle - -elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle -elemToRunStyle ns element parentStyle - | Just rPr <- findChildByName ns "w" "rPr" element = - RunStyle - { - isBold = checkOnOff ns rPr (elemName ns "w" "b") - , isItalic = checkOnOff ns rPr (elemName ns "w" "i") - , isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps") - , isStrike = checkOnOff ns rPr (elemName ns "w" "strike") - , rVertAlign = - findChildByName ns "w" "vertAlign" rPr >>= - findAttrByName ns "w" "val" >>= - \v -> Just $ case v of - "superscript" -> SupScrpt - "subscript" -> SubScrpt - _ -> BaseLn - , rUnderline = - findChildByName ns "w" "u" rPr >>= - findAttrByName ns "w" "val" - , rStyle = parentStyle - } -elemToRunStyle _ _ _ = defaultRunStyle - -isNumericNotNull :: String -> Bool -isNumericNotNull str = (str /= []) && (all isDigit str) - -getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int) -getHeaderLevel ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- stripPrefix "Heading" styleId - , isNumericNotNull index = Just (styleId, read index) - | Just styleId <- findAttrByName ns "w" "styleId" element - , Just index <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" >>= - stripPrefix "heading " - , isNumericNotNull index = Just (styleId, read index) -getHeaderLevel _ _ = Nothing - -blockQuoteStyleIds :: [String] -blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"] - -blockQuoteStyleNames :: [String] -blockQuoteStyleNames = ["Quote", "Block Text"] - -getBlockQuote :: NameSpaces -> Element -> Maybe Bool -getBlockQuote ns element - | Just styleId <- findAttrByName ns "w" "styleId" element - , styleId `elem` blockQuoteStyleIds = Just True - | Just styleName <- findChildByName ns "w" "name" element >>= - findAttrByName ns "w" "val" - , styleName `elem` blockQuoteStyleNames = Just True -getBlockQuote _ _ = Nothing - -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) -getNumInfo ns element = do - let numPr = findChildByName ns "w" "pPr" element >>= - findChildByName ns "w" "numPr" - lvl = fromMaybe "0" (numPr >>= - findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") - numId <- numPr >>= - findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" - return (numId, lvl) - - -elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData -elemToParStyleData ns element parentStyle = - ParStyleData - { - headingLev = getHeaderLevel ns element - , isBlockQuote = getBlockQuote ns element - , numInfo = getNumInfo ns element - , psStyle = parentStyle - } - -elemToRunElem :: NameSpaces -> Element -> D RunElem -elemToRunElem ns element - | isElem ns "w" "t" element - || isElem ns "w" "delText" element - || isElem ns "m" "t" element = do - let str = strContent element - font <- asks envFont - case font of - Nothing -> return $ TextRun str - Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str - | isElem ns "w" "br" element = return LnBrk - | isElem ns "w" "tab" element = return Tab - | isElem ns "w" "softHyphen" element = return SoftHyphen - | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen - | isElem ns "w" "sym" element = return (getSymChar ns element) - | otherwise = throwError WrongElem - where - lowerFromPrivate (ord -> c) - | c >= ord '\xF000' = chr $ c - ord '\xF000' - | otherwise = chr c - --- The char attribute is a hex string -getSymChar :: NameSpaces -> Element -> RunElem -getSymChar ns element - | Just s <- lowerFromPrivate <$> getCodepoint - , Just font <- getFont = - let [(char, _)] = readLitChar ("\\x" ++ s) in - TextRun . maybe "" (:[]) $ getUnicode font char - where - getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element - lowerFromPrivate ('F':xs) = '0':xs - lowerFromPrivate xs = xs -getSymChar _ _ = TextRun "" - -elemToRunElems :: NameSpaces -> Element -> D [RunElem] -elemToRunElems ns element - | isElem ns "w" "r" element - || isElem ns "m" "r" element = do - let qualName = elemName ns "w" - let font = do - fontElem <- findElement (qualName "rFonts") element - stringToFont =<< - (foldr (<|>) Nothing $ - map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"]) - local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) -elemToRunElems _ _ = throwError WrongElem - -setFont :: Maybe Font -> ReaderEnv -> ReaderEnv -setFont f s = s{envFont = f} - diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs deleted file mode 100644 index 00906cf07..000000000 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ /dev/null @@ -1,108 +0,0 @@ -module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) - , alterMap - , getMap - , defaultStyleMaps - , getStyleMaps - , getStyleId - , hasStyleName - ) where - -import Text.XML.Light -import Text.Pandoc.Readers.Docx.Util -import Control.Monad.State -import Data.Char (toLower) -import qualified Data.Map as M - -newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) -newtype CharStyleMap = CharStyleMap ( M.Map String String ) - -class StyleMap a where - alterMap :: (M.Map String String -> M.Map String String) -> a -> a - getMap :: a -> M.Map String String - -instance StyleMap ParaStyleMap where - alterMap f (ParaStyleMap m) = ParaStyleMap $ f m - getMap (ParaStyleMap m) = m - -instance StyleMap CharStyleMap where - alterMap f (CharStyleMap m) = CharStyleMap $ f m - getMap (CharStyleMap m) = m - -insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a -insert (Just k) (Just v) m = alterMap (M.insert k v) m -insert _ _ m = m - -getStyleId :: (StyleMap a) => String -> a -> String -getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap - -hasStyleName :: (StyleMap a) => String -> a -> Bool -hasStyleName styleName = M.member (map toLower styleName) . getMap - -data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces - , sParaStyleMap :: ParaStyleMap - , sCharStyleMap :: CharStyleMap - } - -data StyleType = ParaStyle | CharStyle - -defaultStyleMaps :: StyleMaps -defaultStyleMaps = StyleMaps { sNameSpaces = [] - , sParaStyleMap = ParaStyleMap M.empty - , sCharStyleMap = CharStyleMap M.empty - } - -type StateM a = State StyleMaps a - -getStyleMaps :: Element -> StyleMaps -getStyleMaps docElem = execState genStyleMap state' - where - state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} - genStyleItem e = do - styleType <- getStyleType e - styleId <- getAttrStyleId e - nameValLowercase <- fmap (map toLower) `fmap` getNameVal e - case styleType of - Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId - Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId - _ -> return () - genStyleMap = do - style <- elemName' "style" - let styles = findChildren style docElem - forM_ styles genStyleItem - -modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () -modParaStyleMap f = modify $ \s -> - s {sParaStyleMap = f $ sParaStyleMap s} - -modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () -modCharStyleMap f = modify $ \s -> - s {sCharStyleMap = f $ sCharStyleMap s} - -getStyleType :: Element -> StateM (Maybe StyleType) -getStyleType e = do - styleTypeStr <- getAttrType e - case styleTypeStr of - Just "paragraph" -> return $ Just ParaStyle - Just "character" -> return $ Just CharStyle - _ -> return Nothing - -getAttrType :: Element -> StateM (Maybe String) -getAttrType el = do - name <- elemName' "type" - return $ findAttr name el - -getAttrStyleId :: Element -> StateM (Maybe String) -getAttrStyleId el = do - name <- elemName' "styleId" - return $ findAttr name el - -getNameVal :: Element -> StateM (Maybe String) -getNameVal el = do - name <- elemName' "name" - val <- elemName' "val" - return $ findChild name el >>= findAttr val - -elemName' :: String -> StateM QName -elemName' name = do - namespaces <- gets sNameSpaces - return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs deleted file mode 100644 index 6646e5b7f..000000000 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ /dev/null @@ -1,47 +0,0 @@ -module Text.Pandoc.Readers.Docx.Util ( - NameSpaces - , elemName - , isElem - , elemToNameSpaces - , findChildByName - , findChildrenByName - , findAttrByName - ) where - -import Text.XML.Light -import Data.Maybe (mapMaybe) - -type NameSpaces = [(String, String)] - -elemToNameSpaces :: Element -> NameSpaces -elemToNameSpaces = mapMaybe attrToNSPair . elAttribs - -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - let ns' = ns ++ elemToNameSpaces element - in qName (elName element) == name && - qURI (elName element) == lookup prefix ns' - -findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element -findChildByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el - in findChild (elemName ns' pref name) el - -findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element] -findChildrenByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el - in findChildren (elemName ns' pref name) el - -findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String -findAttrByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el - in findAttr (elemName ns' pref name) el - diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs deleted file mode 100644 index 2eaa842b6..000000000 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ /dev/null @@ -1,279 +0,0 @@ -{-# LANGUAGE - ViewPatterns - , StandaloneDeriving - , TupleSections - , FlexibleContexts #-} - -module Text.Pandoc.Readers.EPUB - (readEPUB) - where - -import Text.XML.Light -import Text.Pandoc.Definition hiding (Attr) -import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Walk (walk, query) -import Text.Pandoc.Options ( ReaderOptions(..)) -import Text.Pandoc.Extensions (enableExtension, Extension(Ext_raw_html)) -import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) -import Network.URI (unEscapeString) -import Text.Pandoc.MediaBag (MediaBag, insertMedia) -import Control.Monad.Except (throwError) -import Text.Pandoc.MIME (MimeType) -import qualified Text.Pandoc.Builder as B -import Codec.Archive.Zip ( Archive (..), toArchiveOrFail, fromEntry - , findEntryByPath, Entry) -import qualified Data.ByteString.Lazy as BL (ByteString) -import System.FilePath ( takeFileName, (</>), dropFileName, normalise - , dropFileName - , splitFileName ) -import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) -import Control.Monad (guard, liftM) -import Data.List (isPrefixOf, isInfixOf) -import Data.Maybe (mapMaybe, fromMaybe) -import qualified Data.Map as M (Map, lookup, fromList, elems) -import Data.Monoid ((<>)) -import Control.DeepSeq (deepseq, NFData) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad) -import qualified Text.Pandoc.Class as P - -type Items = M.Map String (FilePath, MimeType) - -readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc -readEPUB opts bytes = case toArchiveOrFail bytes of - Right archive -> archiveToEPUB opts $ archive - Left _ -> throwError $ PandocParseError "Couldn't extract ePub file" - --- runEPUB :: Except PandocError a -> Either PandocError a --- runEPUB = runExcept - --- Note that internal reference are aggresively normalised so that all ids --- are of the form "filename#id" --- -archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc -archiveToEPUB os archive = do - -- root is path to folder with manifest file in - (root, content) <- getManifest archive - meta <- parseMeta content - (cover, items) <- parseManifest content - -- No need to collapse here as the image path is from the manifest file - let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) - spine <- parseSpine items content - let escapedSpine = map (escapeURI . takeFileName . fst) spine - Pandoc _ bs <- - foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) - `liftM` parseSpineElem root b) mempty spine - let ast = coverDoc <> (Pandoc meta bs) - P.setMediaBag $ fetchImages (M.elems items) root archive ast - return ast - where - os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)} - parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc - parseSpineElem (normalise -> r) (normalise -> path, mime) = do - doc <- mimeToReader mime r path - let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty - return $ docSpan <> doc - mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc - mimeToReader "application/xhtml+xml" (unEscapeString -> root) - (unEscapeString -> path) = do - fname <- findEntryByPathE (root </> path) archive - html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname - return $ fixInternalReferences path html - mimeToReader s _ (unEscapeString -> path) - | s `elem` imageMimes = return $ imageToPandoc path - | otherwise = return $ mempty - --- paths should be absolute when this function is called --- renameImages should do this -fetchImages :: [(FilePath, MimeType)] - -> FilePath -- ^ Root - -> Archive - -> Pandoc - -> MediaBag -fetchImages mimes root arc (query iq -> links) = - foldr (uncurry3 insertMedia) mempty - (mapMaybe getEntry links) - where - getEntry link = - let abslink = normalise (root </> link) in - (link , lookup link mimes, ) . fromEntry - <$> findEntryByPath abslink arc - -iq :: Inline -> [FilePath] -iq (Image _ _ (url, _)) = [url] -iq _ = [] - --- Remove relative paths -renameImages :: FilePath -> Inline -> Inline -renameImages root img@(Image attr a (url, b)) - | "data:" `isPrefixOf` url = img - | otherwise = Image attr a (collapseFilePath (root </> url), b) -renameImages _ x = x - -imageToPandoc :: FilePath -> Pandoc -imageToPandoc s = B.doc . B.para $ B.image s "" mempty - -imageMimes :: [MimeType] -imageMimes = ["image/gif", "image/jpeg", "image/png"] - -type CoverImage = FilePath - -parseManifest :: (PandocMonad m) => Element -> m (Maybe CoverImage, Items) -parseManifest content = do - manifest <- findElementE (dfName "manifest") content - let items = findChildren (dfName "item") manifest - r <- mapM parseItem items - let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover, (M.fromList r)) - where - findCover e = maybe False (isInfixOf "cover-image") - (findAttr (emptyName "properties") e) - parseItem e = do - uid <- findAttrE (emptyName "id") e - href <- findAttrE (emptyName "href") e - mime <- findAttrE (emptyName "media-type") e - return (uid, (href, mime)) - -parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] -parseSpine is e = do - spine <- findElementE (dfName "spine") e - let itemRefs = findChildren (dfName "itemref") spine - mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs - where - parseItemRef ref = do - let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref) - guard linear - findAttr (emptyName "idref") ref - -parseMeta :: PandocMonad m => Element -> m Meta -parseMeta content = do - meta <- findElementE (dfName "metadata") content - let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True - dcspace _ = False - let dcs = filterChildrenName dcspace meta - let r = foldr parseMetaItem nullMeta dcs - return r - --- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem -parseMetaItem :: Element -> Meta -> Meta -parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ strContent e) meta - -renameMeta :: String -> String -renameMeta "creator" = "author" -renameMeta s = s - -getManifest :: PandocMonad m => Archive -> m (String, Element) -getManifest archive = do - metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive - docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) - ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) - as <- liftM ((map attrToPair) . elAttribs) - (findElementE (QName "rootfile" (Just ns) Nothing) docElem) - manifestFile <- mkE "Root not found" (lookup "full-path" as) - let rootdir = dropFileName manifestFile - --mime <- lookup "media-type" as - manifest <- findEntryByPathE manifestFile archive - liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) - --- Fixup - -fixInternalReferences :: FilePath -> Pandoc -> Pandoc -fixInternalReferences pathToFile = - (walk $ renameImages root) - . (walk $ fixBlockIRs filename) - . (walk $ fixInlineIRs filename) - where - (root, escapeURI -> filename) = splitFileName pathToFile - -fixInlineIRs :: String -> Inline -> Inline -fixInlineIRs s (Span as v) = - Span (fixAttrs s as) v -fixInlineIRs s (Code as code) = - Code (fixAttrs s as) code -fixInlineIRs s (Link as is ('#':url, tit)) = - Link (fixAttrs s as) is (addHash s url, tit) -fixInlineIRs s (Link as is t) = - Link (fixAttrs s as) is t -fixInlineIRs _ v = v - -prependHash :: [String] -> Inline -> Inline -prependHash ps l@(Link attr is (url, tit)) - | or [s `isPrefixOf` url | s <- ps] = - Link attr is ('#':url, tit) - | otherwise = l -prependHash _ i = i - -fixBlockIRs :: String -> Block -> Block -fixBlockIRs s (Div as b) = - Div (fixAttrs s as) b -fixBlockIRs s (Header i as b) = - Header i (fixAttrs s as) b -fixBlockIRs s (CodeBlock as code) = - CodeBlock (fixAttrs s as) code -fixBlockIRs _ b = b - -fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) - -addHash :: String -> String -> String -addHash _ "" = "" -addHash s ident = takeFileName s ++ "#" ++ ident - -removeEPUBAttrs :: [(String, String)] -> [(String, String)] -removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs - -isEPUBAttr :: (String, String) -> Bool -isEPUBAttr (k, _) = "epub:" `isPrefixOf` k - --- Library - --- Strict version of foldM -foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a -foldM' _ z [] = return z -foldM' f z (x:xs) = do - z' <- f z x - z' `deepseq` foldM' f z' xs - -uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d -uncurry3 f (a, b, c) = f a b c - --- Utility - -stripNamespace :: QName -> String -stripNamespace (QName v _ _) = v - -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val) -attrToNSPair _ = Nothing - -attrToPair :: Attr -> (String, String) -attrToPair (Attr (QName name _ _) val) = (name, val) - -defaultNameSpace :: Maybe String -defaultNameSpace = Just "http://www.idpf.org/2007/opf" - -dfName :: String -> QName -dfName s = QName s defaultNameSpace Nothing - -emptyName :: String -> QName -emptyName s = QName s Nothing Nothing - --- Convert Maybe interface to Either - -findAttrE :: PandocMonad m => QName -> Element -> m String -findAttrE q e = mkE "findAttr" $ findAttr q e - -findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry -findEntryByPathE (normalise -> path) a = - mkE ("No entry on path: " ++ path) $ findEntryByPath path a - -parseXMLDocE :: PandocMonad m => String -> m Element -parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc - -findElementE :: PandocMonad m => QName -> Element -> m Element -findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x - -mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs deleted file mode 100644 index f02f1a1d4..000000000 --- a/src/Text/Pandoc/Readers/HTML.hs +++ /dev/null @@ -1,1136 +0,0 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, -ViewPatterns#-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of HTML to 'Pandoc' document. --} -module Text.Pandoc.Readers.HTML ( readHtml - , htmlTag - , htmlInBalanced - , isInlineTag - , isBlockTag - , isTextTag - , isCommentTag - ) where - -import Text.HTML.TagSoup -import Text.HTML.TagSoup.Match -import Text.Pandoc.Definition -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) -import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField - , escapeURI, safeRead ) -import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, - Extension (Ext_epub_html_exts, - Ext_raw_html, Ext_native_divs, Ext_native_spans)) -import Text.Pandoc.Logging -import Text.Pandoc.Parsing hiding ((<|>)) -import Text.Pandoc.Walk -import qualified Data.Map as M -import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) -import Data.Char ( isDigit ) -import Control.Monad ( guard, mzero, void, unless ) -import Control.Arrow ((***)) -import Control.Applicative ( (<|>) ) -import Data.Monoid (First (..)) -import Text.TeXMath (readMathML, writeTeX) -import Data.Default (Default (..), def) -import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift) -import Network.URI (URI, parseURIReference, nonStrictRelativeTo) -import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) -import Data.Monoid ((<>)) -import Text.Parsec.Error -import qualified Data.Set as Set -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import Control.Monad.Except (throwError) - --- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> m Pandoc -readHtml opts inp = do - let tags = stripPrefixes . canonicalizeTags $ - parseTagsOptions parseOptions{ optTagPosition = True } inp - parseDoc = do - blocks <- (fixPlains False) . mconcat <$> manyTill block eof - meta <- stateMeta . parserState <$> getState - bs' <- replaceNotes (B.toList blocks) - return $ Pandoc meta bs' - getError (errorMessages -> ms) = case ms of - [] -> "" - (m:_) -> messageString m - result <- flip runReaderT def $ - runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty) - "source" tags - case result of - Right doc -> return doc - Left err -> throwError $ PandocParseError $ getError err - -replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] -replaceNotes = walkM replaceNotes' - -replaceNotes' :: PandocMonad m => Inline -> TagParser m Inline -replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes - where - getNotes = noteTable <$> getState -replaceNotes' x = return x - -data HTMLState = - HTMLState - { parserState :: ParserState, - noteTable :: [(String, Blocks)], - baseHref :: Maybe URI, - identifiers :: Set.Set String, - headerMap :: M.Map Inlines String - } - -data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext - , inChapter :: Bool -- ^ Set if in chapter section - , inPlain :: Bool -- ^ Set if in pPlain - } - -setInChapter :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a -setInChapter = local (\s -> s {inChapter = True}) - -setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a -setInPlain = local (\s -> s {inPlain = True}) - -type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) - -type TagParser m = HTMLParser m [Tag String] - -pBody :: PandocMonad m => TagParser m Blocks -pBody = pInTags "body" block - -pHead :: PandocMonad m => TagParser m Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) - where pTitle = pInTags "title" inline >>= setTitle . trimInlines - setTitle t = mempty <$ (updateState $ B.setMeta "title" t) - pMetaTag = do - mt <- pSatisfy (~== TagOpen "meta" []) - let name = fromAttrib "name" mt - if null name - then return mempty - else do - let content = fromAttrib "content" mt - updateState $ \s -> - let ps = parserState s in - s{ parserState = ps{ - stateMeta = addMetaField name (B.text content) - (stateMeta ps) } } - return mempty - pBaseTag = do - bt <- pSatisfy (~== TagOpen "base" []) - updateState $ \st -> st{ baseHref = - parseURIReference $ fromAttrib "href" bt } - return mempty - -block :: PandocMonad m => TagParser m Blocks -block = do - pos <- getPosition - res <- choice - [ eSection - , eSwitch B.para block - , mempty <$ eFootnote - , mempty <$ eTOC - , mempty <$ eTitlePage - , pPara - , pHeader - , pBlockQuote - , pCodeBlock - , pList - , pHrule - , pTable - , pHead - , pBody - , pDiv - , pPlain - , pRawHtmlBlock - ] - report $ ParsingTrace (take 60 $ show $ B.toList res) pos - return res - -namespaces :: PandocMonad m => [(String, TagParser m Inlines)] -namespaces = [(mathMLNamespace, pMath True)] - -mathMLNamespace :: String -mathMLNamespace = "http://www.w3.org/1998/Math/MathML" - -eSwitch :: (PandocMonad m, Monoid a) - => (Inlines -> a) - -> TagParser m a - -> TagParser m a -eSwitch constructor parser = try $ do - guardEnabled Ext_epub_html_exts - pSatisfy (~== TagOpen "switch" []) - cases <- getFirst . mconcat <$> - manyTill (First <$> (eCase <* skipMany pBlank) ) - (lookAhead $ try $ pSatisfy (~== TagOpen "default" [])) - skipMany pBlank - fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank) - skipMany pBlank - pSatisfy (~== TagClose "switch") - return $ maybe fallback constructor cases - -eCase :: PandocMonad m => TagParser m (Maybe Inlines) -eCase = do - skipMany pBlank - TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" []) - case (flip lookup namespaces) =<< lookup "required-namespace" attr of - Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank)) - Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case")) - -eFootnote :: PandocMonad m => TagParser m () -eFootnote = try $ do - let notes = ["footnote", "rearnote"] - guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag - guard (maybe False (flip elem notes) (lookup "type" attr)) - let ident = fromMaybe "" (lookup "id" attr) - content <- pInTags tag block - addNote ident content - -addNote :: PandocMonad m => String -> Blocks -> TagParser m () -addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)}) - -eNoteref :: PandocMonad m => TagParser m Inlines -eNoteref = try $ do - guardEnabled Ext_epub_html_exts - TagOpen tag attr <- lookAhead $ pAnyTag - guard (maybe False (== "noteref") (lookup "type" attr)) - let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr) - guard (not (null ident)) - pInTags tag block - return $ B.rawInline "noteref" ident - --- Strip TOC if there is one, better to generate again -eTOC :: PandocMonad m => TagParser m () -eTOC = try $ do - guardEnabled Ext_epub_html_exts - (TagOpen tag attr) <- lookAhead $ pAnyTag - guard (maybe False (== "toc") (lookup "type" attr)) - void (pInTags tag block) - -pList :: PandocMonad m => TagParser m Blocks -pList = pBulletList <|> pOrderedList <|> pDefinitionList - -pBulletList :: PandocMonad m => TagParser m Blocks -pBulletList = try $ do - pSatisfy (~== TagOpen "ul" []) - let nonItem = pSatisfy (\t -> - not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ul")) - -- note: if they have an <ol> or <ul> not in scope of a <li>, - -- treat it as a list item, though it's not valid xhtml... - skipMany nonItem - items <- manyTill (pListItem nonItem) (pCloses "ul") - return $ B.bulletList $ map (fixPlains True) items - -pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks -pListItem nonItem = do - TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" []) - let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) - (liDiv <>) <$> pInTags "li" block <* skipMany nonItem - -parseListStyleType :: String -> ListNumberStyle -parseListStyleType "lower-roman" = LowerRoman -parseListStyleType "upper-roman" = UpperRoman -parseListStyleType "lower-alpha" = LowerAlpha -parseListStyleType "upper-alpha" = UpperAlpha -parseListStyleType "decimal" = Decimal -parseListStyleType _ = DefaultStyle - -parseTypeAttr :: String -> ListNumberStyle -parseTypeAttr "i" = LowerRoman -parseTypeAttr "I" = UpperRoman -parseTypeAttr "a" = LowerAlpha -parseTypeAttr "A" = UpperAlpha -parseTypeAttr "1" = Decimal -parseTypeAttr _ = DefaultStyle - -pOrderedList :: PandocMonad m => TagParser m Blocks -pOrderedList = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) - let (start, style) = (sta', sty') - where sta = fromMaybe "1" $ - lookup "start" attribs - sta' = if all isDigit sta - then read sta - else 1 - - pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"] - - typeAttr = fromMaybe "" $ lookup "type" attribs - classAttr = fromMaybe "" $ lookup "class" attribs - styleAttr = fromMaybe "" $ lookup "style" attribs - listStyle = fromMaybe "" $ pickListStyle styleAttr - - sty' = foldOrElse DefaultStyle - [ parseTypeAttr typeAttr - , parseListStyleType classAttr - , parseListStyleType listStyle - ] - let nonItem = pSatisfy (\t -> - not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && - not (t ~== TagClose "ol")) - -- note: if they have an <ol> or <ul> not in scope of a <li>, - -- treat it as a list item, though it's not valid xhtml... - skipMany nonItem - items <- manyTill (pListItem nonItem) (pCloses "ol") - return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items - -pDefinitionList :: PandocMonad m => TagParser m Blocks -pDefinitionList = try $ do - pSatisfy (~== TagOpen "dl" []) - items <- manyTill pDefListItem (pCloses "dl") - return $ B.definitionList items - -pDefListItem :: PandocMonad m => TagParser m (Inlines, [Blocks]) -pDefListItem = try $ do - let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && - not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) - terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) - defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) - skipMany nonItem - let term = foldl1 (\x y -> x <> B.linebreak <> y) terms - return (term, map (fixPlains True) defs) - -fixPlains :: Bool -> Blocks -> Blocks -fixPlains inList bs = if any isParaish bs' - then B.fromList $ map plainToPara bs' - else bs - where isParaish (Para _) = True - isParaish (CodeBlock _ _) = True - isParaish (Header _ _ _) = True - isParaish (BlockQuote _) = True - isParaish (BulletList _) = not inList - isParaish (OrderedList _ _) = not inList - isParaish (DefinitionList _) = not inList - isParaish _ = False - plainToPara (Plain xs) = Para xs - plainToPara x = x - bs' = B.toList bs - -pRawTag :: PandocMonad m => TagParser m String -pRawTag = do - tag <- pAnyTag - let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] - if tagOpen ignorable (const True) tag || tagClose ignorable tag - then return [] - else return $ renderTags' [tag] - -pDiv :: PandocMonad m => TagParser m Blocks -pDiv = try $ do - guardEnabled Ext_native_divs - let isDivLike "div" = True - isDivLike "section" = True - isDivLike _ = False - TagOpen tag attr <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) - contents <- pInTags tag block - let (ident, classes, kvs) = mkAttr attr - let classes' = if tag == "section" - then "section":classes - else classes - return $ B.divWith (ident, classes', kvs) contents - -pRawHtmlBlock :: PandocMonad m => TagParser m Blocks -pRawHtmlBlock = do - raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - exts <- getOption readerExtensions - if extensionEnabled Ext_raw_html exts && not (null raw) - then return $ B.rawBlock "html" raw - else ignore raw - -ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a -ignore raw = do - pos <- getPosition - -- raw can be null for tags like <!DOCTYPE>; see paRawTag - -- in this case we don't want a warning: - unless (null raw) $ - report $ SkippedContent raw pos - return mempty - -pHtmlBlock :: PandocMonad m => String -> TagParser m String -pHtmlBlock t = try $ do - open <- pSatisfy (~== TagOpen t []) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) - return $ renderTags' $ [open] ++ contents ++ [TagClose t] - --- Sets chapter context -eSection :: PandocMonad m => TagParser m Blocks -eSection = try $ do - let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as) - let sectTag = tagOpen (`elem` sectioningContent) matchChapter - TagOpen tag _ <- lookAhead $ pSatisfy sectTag - setInChapter (pInTags tag block) - -headerLevel :: PandocMonad m => String -> TagParser m Int -headerLevel tagtype = do - let level = read (drop 1 tagtype) - (try $ do - guardEnabled Ext_epub_html_exts - asks inChapter >>= guard - return (level - 1)) - <|> - return level - -eTitlePage :: PandocMonad m => TagParser m () -eTitlePage = try $ do - let isTitlePage as = maybe False (isInfixOf "titlepage") (lookup "type" as) - let groupTag = tagOpen (\x -> x `elem` groupingContent || x == "section") - isTitlePage - TagOpen tag _ <- lookAhead $ pSatisfy groupTag - () <$ pInTags tag block - -pHeader :: PandocMonad m => TagParser m Blocks -pHeader = try $ do - TagOpen tagtype attr <- pSatisfy $ - tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) - (const True) - let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] - level <- headerLevel tagtype - contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) - let ident = fromMaybe "" $ lookup "id" attr - let classes = maybe [] words $ lookup "class" attr - let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] - attr' <- registerHeader (ident, classes, keyvals) contents - return $ if bodyTitle - then mempty -- skip a representation of the title in the body - else B.headerWith attr' level contents - -pHrule :: PandocMonad m => TagParser m Blocks -pHrule = do - pSelfClosing (=="hr") (const True) - return B.horizontalRule - -pTable :: PandocMonad m => TagParser m Blocks -pTable = try $ do - TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) - skipMany pBlank - caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank - widths' <- (mconcat <$> many1 pColgroup) <|> many pCol - let pTh = option [] $ pInTags "tr" (pCell "th") - pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") - pTBody = do pOptInTag "tbody" $ many1 pTr - head'' <- pOptInTag "thead" pTh - head' <- pOptInTag "tbody" $ do - if null head'' - then pTh - else return head'' - rowsLs <- many pTBody - rows' <- pOptInTag "tfoot" $ many pTr - TagClose _ <- pSatisfy (~== TagClose "table") - let rows'' = (concat rowsLs) ++ rows' - -- fail on empty table - guard $ not $ null head' && null rows'' - let isSinglePlain x = case B.toList x of - [] -> True - [Plain _] -> True - _ -> False - let isSimple = all isSinglePlain $ concat (head':rows'') - let cols = length $ if null head' then head rows'' else head' - -- add empty cells to short rows - let addEmpties r = case cols - length r of - n | n > 0 -> r ++ replicate n mempty - | otherwise -> r - let rows = map addEmpties rows'' - let aligns = replicate cols AlignDefault - let widths = if null widths' - then if isSimple - then replicate cols 0 - else replicate cols (1.0 / fromIntegral cols) - else widths' - return $ B.table caption (zip aligns widths) head' rows - -pCol :: PandocMonad m => TagParser m Double -pCol = try $ do - TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) - skipMany pBlank - optional $ pSatisfy (~== TagClose "col") - skipMany pBlank - return $ case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead ('0':'.':filter - (`notElem` " \t\r\n%'\";") xs) - _ -> 0.0 - Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead ('0':'.':init x) - _ -> 0.0 - -pColgroup :: PandocMonad m => TagParser m [Double] -pColgroup = try $ do - pSatisfy (~== TagOpen "colgroup" []) - skipMany pBlank - manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank - -noColOrRowSpans :: Tag String -> Bool -noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan" - where isNullOrOne x = case fromAttrib x t of - "" -> True - "1" -> True - _ -> False - -pCell :: PandocMonad m => String -> TagParser m [Blocks] -pCell celltype = try $ do - skipMany pBlank - res <- pInTags' celltype noColOrRowSpans block - skipMany pBlank - return [res] - -pBlockQuote :: PandocMonad m => TagParser m Blocks -pBlockQuote = do - contents <- pInTags "blockquote" block - return $ B.blockQuote $ fixPlains False contents - -pPlain :: PandocMonad m => TagParser m Blocks -pPlain = do - contents <- setInPlain $ trimInlines . mconcat <$> many1 inline - if B.isNull contents - then return mempty - else return $ B.plain contents - -pPara :: PandocMonad m => TagParser m Blocks -pPara = do - contents <- trimInlines <$> pInTags "p" inline - return $ B.para contents - -pCodeBlock :: PandocMonad m => TagParser m Blocks -pCodeBlock = try $ do - TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) - contents <- manyTill pAnyTag (pCloses "pre" <|> eof) - let rawText = concatMap tagToString contents - -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText - -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' - return $ B.codeBlockWith (mkAttr attr) result - -tagToString :: Tag String -> String -tagToString (TagText s) = s -tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" - -inline :: PandocMonad m => TagParser m Inlines -inline = choice - [ eNoteref - , eSwitch id inline - , pTagText - , pQ - , pEmph - , pStrong - , pSuperscript - , pSubscript - , pStrikeout - , pLineBreak - , pLink - , pImage - , pCode - , pSpan - , pMath False - , pRawHtmlInline - ] - -pLocation :: PandocMonad m => TagParser m () -pLocation = do - (TagPosition r c) <- pSat isTagPosition - setPosition $ newPos "input" r c - -pSat :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) -pSat f = do - pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) - -pSatisfy :: PandocMonad m => (Tag String -> Bool) -> TagParser m (Tag String) -pSatisfy f = try $ optional pLocation >> pSat f - -pAnyTag :: PandocMonad m => TagParser m (Tag String) -pAnyTag = pSatisfy (const True) - -pSelfClosing :: PandocMonad m - => (String -> Bool) -> ([Attribute String] -> Bool) - -> TagParser m (Tag String) -pSelfClosing f g = do - open <- pSatisfy (tagOpen f g) - optional $ pSatisfy (tagClose f) - return open - -pQ :: PandocMonad m => TagParser m Inlines -pQ = do - context <- asks quoteContext - let quoteType = case context of - InDoubleQuote -> SingleQuote - _ -> DoubleQuote - let innerQuoteContext = if quoteType == SingleQuote - then InSingleQuote - else InDoubleQuote - let constructor = case quoteType of - SingleQuote -> B.singleQuoted - DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ - pInlinesInTags "q" constructor - -pEmph :: PandocMonad m => TagParser m Inlines -pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph - -pStrong :: PandocMonad m => TagParser m Inlines -pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong - -pSuperscript :: PandocMonad m => TagParser m Inlines -pSuperscript = pInlinesInTags "sup" B.superscript - -pSubscript :: PandocMonad m => TagParser m Inlines -pSubscript = pInlinesInTags "sub" B.subscript - -pStrikeout :: PandocMonad m => TagParser m Inlines -pStrikeout = do - pInlinesInTags "s" B.strikeout <|> - pInlinesInTags "strike" B.strikeout <|> - pInlinesInTags "del" B.strikeout <|> - try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) - contents <- mconcat <$> manyTill inline (pCloses "span") - return $ B.strikeout contents) - -pLineBreak :: PandocMonad m => TagParser m Inlines -pLineBreak = do - pSelfClosing (=="br") (const True) - return B.linebreak - --- Unlike fromAttrib from tagsoup, this distinguishes --- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag String -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs -maybeFromAttrib _ _ = Nothing - -pLink :: PandocMonad m => TagParser m Inlines -pLink = try $ do - tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = fromAttrib "title" tag - -- take id from id attribute if present, otherwise name - let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag - let cls = words $ fromAttrib "class" tag - lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - -- check for href; if href, then a link, otherwise a span - case maybeFromAttrib "href" tag of - Nothing -> - return $ B.spanWith (uid, cls, []) lab - Just url' -> do - mbBaseHref <- baseHref <$> getState - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> - show (rel `nonStrictRelativeTo` bs) - _ -> url' - return $ B.linkWith (uid, cls, []) (escapeURI url) title lab - -pImage :: PandocMonad m => TagParser m Inlines -pImage = do - tag <- pSelfClosing (=="img") (isJust . lookup "src") - mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "src" tag - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) - _ -> url' - let title = fromAttrib "title" tag - let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = words $ fromAttrib "class" tag - let getAtt k = case fromAttrib k tag of - "" -> [] - v -> [(k, v)] - let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) - -pCode :: PandocMonad m => TagParser m Inlines -pCode = try $ do - (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) - result <- manyTill pAnyTag (pCloses open) - return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result - -pSpan :: PandocMonad m => TagParser m Inlines -pSpan = try $ do - guardEnabled Ext_native_spans - TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) - contents <- pInTags "span" inline - let isSmallCaps = fontVariant == "small-caps" - where styleAttr = fromMaybe "" $ lookup "style" attr - fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr - let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) - return $ tag contents - -pRawHtmlInline :: PandocMonad m => TagParser m Inlines -pRawHtmlInline = do - inplain <- asks inPlain - result <- pSatisfy (tagComment (const True)) - <|> if inplain - then pSatisfy (not . isBlockTag) - else pSatisfy isInlineTag - exts <- getOption readerExtensions - let raw = renderTags' [result] - if extensionEnabled Ext_raw_html exts - then return $ B.rawInline "html" raw - else ignore raw - -mathMLToTeXMath :: String -> Either String String -mathMLToTeXMath s = writeTeX <$> readMathML s - -pMath :: PandocMonad m => Bool -> TagParser m Inlines -pMath inCase = try $ do - open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True) - -- we'll assume math tags are MathML unless specially marked - -- otherwise... - unless inCase $ - guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) - contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math")) - case mathMLToTeXMath (renderTags $ [open] ++ contents ++ [TagClose "math"]) of - Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - innerText contents - Right [] -> return mempty - Right x -> return $ case lookup "display" attr of - Just "block" -> B.displayMath x - _ -> B.math x - -pInlinesInTags :: PandocMonad m => String -> (Inlines -> Inlines) - -> TagParser m Inlines -pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline - -pInTags :: (PandocMonad m, Monoid a) => String -> TagParser m a -> TagParser m a -pInTags tagtype parser = pInTags' tagtype (const True) parser - -pInTags' :: (PandocMonad m, Monoid a) - => String - -> (Tag String -> Bool) - -> TagParser m a - -> TagParser m a -pInTags' tagtype tagtest parser = try $ do - pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t) - mconcat <$> manyTill parser (pCloses tagtype <|> eof) - --- parses p, preceeded by an optional opening tag --- and followed by an optional closing tags -pOptInTag :: PandocMonad m => String -> TagParser m a -> TagParser m a -pOptInTag tagtype p = try $ do - skipMany pBlank - optional $ pSatisfy (~== TagOpen tagtype []) - skipMany pBlank - x <- p - skipMany pBlank - optional $ pSatisfy (~== TagClose tagtype) - skipMany pBlank - return x - -pCloses :: PandocMonad m => String -> TagParser m () -pCloses tagtype = try $ do - t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag - case t of - (TagClose t') | t' == tagtype -> pAnyTag >> return () - (TagOpen t' _) | t' `closes` tagtype -> return () - (TagClose "ul") | tagtype == "li" -> return () - (TagClose "ol") | tagtype == "li" -> return () - (TagClose "dl") | tagtype == "dd" -> return () - (TagClose "table") | tagtype == "td" -> return () - (TagClose "table") | tagtype == "tr" -> return () - _ -> mzero - -pTagText :: PandocMonad m => TagParser m Inlines -pTagText = try $ do - (TagText str) <- pSatisfy isTagText - st <- getState - qu <- ask - parsed <- lift $ lift $ - flip runReaderT qu $ runParserT (many pTagContents) st "text" str - case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" ++ str ++ "'" - Right result -> return $ mconcat result - -pBlank :: PandocMonad m => TagParser m () -pBlank = try $ do - (TagText str) <- pSatisfy isTagText - guard $ all isSpace str - -type InlinesParser m = HTMLParser m String - -pTagContents :: PandocMonad m => InlinesParser m Inlines -pTagContents = - B.displayMath <$> mathDisplay - <|> B.math <$> mathInline - <|> pStr - <|> pSpace - <|> smartPunctuation pTagContents - <|> pSymbol - <|> pBad - -pStr :: PandocMonad m => InlinesParser m Inlines -pStr = do - result <- many1 $ satisfy $ \c -> - not (isSpace c) && not (isSpecial c) && not (isBad c) - updateLastStrPos - return $ B.str result - -isSpecial :: Char -> Bool -isSpecial '"' = True -isSpecial '\'' = True -isSpecial '.' = True -isSpecial '-' = True -isSpecial '$' = True -isSpecial '\8216' = True -isSpecial '\8217' = True -isSpecial '\8220' = True -isSpecial '\8221' = True -isSpecial _ = False - -pSymbol :: PandocMonad m => InlinesParser m Inlines -pSymbol = satisfy isSpecial >>= return . B.str . (:[]) - -isBad :: Char -> Bool -isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML - -pBad :: PandocMonad m => InlinesParser m Inlines -pBad = do - c <- satisfy isBad - let c' = case c of - '\128' -> '\8364' - '\130' -> '\8218' - '\131' -> '\402' - '\132' -> '\8222' - '\133' -> '\8230' - '\134' -> '\8224' - '\135' -> '\8225' - '\136' -> '\710' - '\137' -> '\8240' - '\138' -> '\352' - '\139' -> '\8249' - '\140' -> '\338' - '\142' -> '\381' - '\145' -> '\8216' - '\146' -> '\8217' - '\147' -> '\8220' - '\148' -> '\8221' - '\149' -> '\8226' - '\150' -> '\8211' - '\151' -> '\8212' - '\152' -> '\732' - '\153' -> '\8482' - '\154' -> '\353' - '\155' -> '\8250' - '\156' -> '\339' - '\158' -> '\382' - '\159' -> '\376' - _ -> '?' - return $ B.str [c'] - -pSpace :: PandocMonad m => InlinesParser m Inlines -pSpace = many1 (satisfy isSpace) >>= \xs -> - if '\n' `elem` xs - then return B.softbreak - else return B.space - --- --- Constants --- - -eitherBlockOrInline :: [String] -eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed", - "del", "ins", - "progress", "map", "area", "noscript", "script", - "object", "svg", "video", "source"] - -{- -inlineHtmlTags :: [[Char]] -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] --} - -blockHtmlTags :: [String] -blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", - "blockquote", "body", "button", "canvas", - "caption", "center", "col", "colgroup", "dd", "dir", "div", - "dl", "dt", "fieldset", "figcaption", "figure", - "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "head", "header", "hgroup", "hr", "html", - "isindex", "menu", "noframes", "ol", "output", "p", "pre", - "section", "table", "tbody", "textarea", - "thead", "tfoot", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style"] - --- We want to allow raw docbook in markdown documents, so we --- include docbook block tags here too. -blockDocBookTags :: [String] -blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", - "orderedlist", "segmentedlist", "simplelist", - "variablelist", "caution", "important", "note", "tip", - "warning", "address", "literallayout", "programlisting", - "programlistingco", "screen", "screenco", "screenshot", - "synopsis", "example", "informalexample", "figure", - "informalfigure", "table", "informaltable", "para", - "simpara", "formalpara", "equation", "informalequation", - "figure", "screenshot", "mediaobject", "qandaset", - "procedure", "task", "cmdsynopsis", "funcsynopsis", - "classsynopsis", "blockquote", "epigraph", "msgset", - "sidebar", "title"] - -epubTags :: [String] -epubTags = ["case", "switch", "default"] - -blockTags :: [String] -blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags - -isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen isInlineTagName (const True) t || - tagClose isInlineTagName t || - tagComment (const True) t - where isInlineTagName x = x `notElem` blockTags - -isBlockTag :: Tag String -> Bool -isBlockTag t = tagOpen isBlockTagName (const True) t || - tagClose isBlockTagName t || - tagComment (const True) t - where isBlockTagName ('?':_) = True - isBlockTagName ('!':_) = True - isBlockTagName x = x `elem` blockTags - || x `elem` eitherBlockOrInline - -isTextTag :: Tag String -> Bool -isTextTag = tagText (const True) - -isCommentTag :: Tag String -> Bool -isCommentTag = tagComment (const True) - --- taken from HXT and extended --- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags -closes :: String -> String -> Bool -_ `closes` "body" = False -_ `closes` "html" = False -"body" `closes` "head" = True -"a" `closes` "a" = True -"li" `closes` "li" = True -"th" `closes` t | t `elem` ["th","td"] = True -"tr" `closes` t | t `elem` ["th","td","tr"] = True -"dd" `closes` t | t `elem` ["dt", "dd"] = True -"dt" `closes` t | t `elem` ["dt","dd"] = True -"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True -"optgroup" `closes` "optgroup" = True -"optgroup" `closes` "option" = True -"option" `closes` "option" = True --- http://www.w3.org/TR/html-markup/p.html -x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote", - "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section", - "table", "ul"] = True -"meta" `closes` "meta" = True -"form" `closes` "form" = True -"label" `closes` "label" = True -"map" `closes` "map" = True -"object" `closes` "object" = True -_ `closes` t | t `elem` ["option","style","script","textarea","title"] = True -t `closes` "select" | t /= "option" = True -"thead" `closes` t | t `elem` ["colgroup"] = True -"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True -"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True -t `closes` t2 | - t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && - t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" -t1 `closes` t2 | - t1 `elem` blockTags && - t2 `notElem` (blockTags ++ eitherBlockOrInline) = True -_ `closes` _ = False - ---- parsers for use in markdown, textile readers - --- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Monad m) - => (Tag String -> Bool) - -> ParserT String st m String -htmlInBalanced f = try $ do - lookAhead (char '<') - inp <- getInput - let ts = canonicalizeTags $ - parseTagsOptions parseOptions{ optTagWarning = True, - optTagPosition = True } inp - case ts of - (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do - guard $ f t - guard $ not $ hasTagWarning (t : take 1 rest) - case htmlInBalanced' tn (t:rest) of - [] -> mzero - xs -> case reverse xs of - (TagClose _ : TagPosition er ec : _) -> do - let ls = er - sr - let cs = ec - sc - lscontents <- unlines <$> count ls anyLine - cscontents <- count cs anyChar - (_,closetag) <- htmlTag (~== TagClose tn) - return (lscontents ++ cscontents ++ closetag) - _ -> mzero - _ -> mzero - -htmlInBalanced' :: String - -> [Tag String] - -> [Tag String] -htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts - where go :: Int -> [Tag String] -> Maybe [Tag String] - go n (t@(TagOpen tn' _):rest) | tn' == tagname = - (t :) <$> go (n + 1) rest - go 1 (t@(TagClose tn'):_) | tn' == tagname = - return [t] - go n (t@(TagClose tn'):rest) | tn' == tagname = - (t :) <$> go (n - 1) rest - go n (t:ts') = (t :) <$> go n ts' - go _ [] = mzero - -hasTagWarning :: [Tag String] -> Bool -hasTagWarning (TagWarning _:_) = True -hasTagWarning _ = False - --- | Matches a tag meeting a certain condition. -htmlTag :: Monad m - => (Tag String -> Bool) - -> ParserT [Char] st m (Tag String, String) -htmlTag f = try $ do - lookAhead (char '<') - inp <- getInput - let (next : _) = canonicalizeTags $ parseTagsOptions - parseOptions{ optTagWarning = False } inp - guard $ f next - let handleTag tagname = do - -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> - -- should NOT be parsed as an HTML tag, see #2277 - guard $ not ('.' `elem` tagname) - -- <https://example.org> should NOT be a tag either. - -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ not (null tagname) - guard $ last tagname /= ':' - rendered <- manyTill anyChar (char '>') - return (next, rendered ++ ">") - case next of - TagComment s - | "<!--" `isPrefixOf` inp -> do - count (length s + 4) anyChar - skipMany (satisfy (/='>')) - char '>' - return (next, "<!--" ++ s ++ "-->") - | otherwise -> fail "bogus comment mode, HTML5 parse error" - TagOpen tagname _attr -> handleTag tagname - TagClose tagname -> handleTag tagname - _ -> mzero - -mkAttr :: [(String, String)] -> Attr -mkAttr attr = (attribsId, attribsClasses, attribsKV) - where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes - attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr - --- Strip namespace prefixes -stripPrefixes :: [Tag String] -> [Tag String] -stripPrefixes = map stripPrefix - -stripPrefix :: Tag String -> Tag String -stripPrefix (TagOpen s as) = - TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as) -stripPrefix (TagClose s) = TagClose (stripPrefix' s) -stripPrefix x = x - -stripPrefix' :: String -> String -stripPrefix' s = - case span (/= ':') s of - (_, "") -> s - (_, (_:ts)) -> ts - -isSpace :: Char -> Bool -isSpace ' ' = True -isSpace '\t' = True -isSpace '\n' = True -isSpace '\r' = True -isSpace _ = False - --- Instances - -instance HasIdentifierList HTMLState where - extractIdentifierList = identifiers - updateIdentifierList f s = s{ identifiers = f (identifiers s) } - -instance HasHeaderMap HTMLState where - extractHeaderMap = headerMap - updateHeaderMap f s = s{ headerMap = f (headerMap s) } - --- This signature should be more general --- MonadReader HTMLLocal m => HasQuoteContext st m -instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where - getQuoteContext = asks quoteContext - withQuoteContext q = local (\s -> s{quoteContext = q}) - -instance HasReaderOptions HTMLState where - extractReaderOptions = extractReaderOptions . parserState - -instance HasMeta HTMLState where - setMeta s b st = st {parserState = setMeta s b $ parserState st} - deleteMeta s st = st {parserState = deleteMeta s $ parserState st} - -instance Default HTMLLocal where - def = HTMLLocal NoQuote False False - -instance HasLastStrPosition HTMLState where - setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} - getLastStrPos = getLastStrPos . parserState - - --- EPUB Specific --- --- -sectioningContent :: [String] -sectioningContent = ["article", "aside", "nav", "section"] - - -groupingContent :: [String] -groupingContent = ["p", "hr", "pre", "blockquote", "ol" - , "ul", "li", "dl", "dt", "dt", "dd" - , "figure", "figcaption", "div", "main"] - - -{- - -types :: [(String, ([String], Int))] -types = -- Document divisions - map (\s -> (s, (["section", "body"], 0))) - ["volume", "part", "chapter", "division"] - ++ -- Document section and components - [ - ("abstract", ([], 0))] --} diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs deleted file mode 100644 index 310a04574..000000000 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ /dev/null @@ -1,160 +0,0 @@ -{-# LANGUAGE CPP #-} -{- | - Module : Text.Pandoc.Readers.Haddock - Copyright : Copyright (C) 2013 David Lazar - License : GNU GPL, version 2 or above - - Maintainer : David Lazar <lazar6@illinois.edu>, - John MacFarlane <jgm@berkeley.edu> - Stability : alpha - -Conversion of Haddock markup to 'Pandoc' document. --} -module Text.Pandoc.Readers.Haddock - ( readHaddock - ) where - -import Text.Pandoc.Builder (Blocks, Inlines) -import qualified Text.Pandoc.Builder as B -import Data.Monoid ((<>)) -import Text.Pandoc.Shared (trim, splitBy) -import Data.List (intersperse, stripPrefix) -import Data.Maybe (fromMaybe) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Documentation.Haddock.Parser -import Documentation.Haddock.Types -import Text.Pandoc.Error -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad) - - --- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: PandocMonad m - => ReaderOptions - -> String - -> m Pandoc -readHaddock opts s = case readHaddockEither opts s of - Right result -> return result - Left e -> throwError e - -readHaddockEither :: ReaderOptions -- ^ Reader options - -> String -- ^ String to parse - -> Either PandocError Pandoc -readHaddockEither _opts = -#if MIN_VERSION_haddock_library(1,2,0) - Right . B.doc . docHToBlocks . _doc . parseParas -#else - Right . B.doc . docHToBlocks . parseParas -#endif - -docHToBlocks :: DocH String Identifier -> Blocks -docHToBlocks d' = - case d' of - DocEmpty -> mempty - DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> - B.headerWith (ident,[],[]) (headerLevel h) - (docHToInlines False $ headerTitle h) - DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) - DocString _ -> inlineFallback - DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h - DocParagraph x -> B.para $ docHToInlines False x - DocIdentifier _ -> inlineFallback - DocIdentifierUnchecked _ -> inlineFallback - DocModule s -> B.plain $ docHToInlines False $ DocModule s - DocWarning _ -> mempty -- TODO - DocEmphasis _ -> inlineFallback - DocMonospaced _ -> inlineFallback - DocBold _ -> inlineFallback -#if MIN_VERSION_haddock_library(1,4,0) - DocMathInline _ -> inlineFallback - DocMathDisplay _ -> inlineFallback -#endif - DocHeader h -> B.header (headerLevel h) - (docHToInlines False $ headerTitle h) - DocUnorderedList items -> B.bulletList (map docHToBlocks items) - DocOrderedList items -> B.orderedList (map docHToBlocks items) - DocDefList items -> B.definitionList (map (\(d,t) -> - (docHToInlines False d, - [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s - DocCodeBlock d -> B.para $ docHToInlines True d - DocHyperlink _ -> inlineFallback - DocPic _ -> inlineFallback - DocAName _ -> inlineFallback - DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) - DocExamples es -> mconcat $ map (\e -> - makeExample ">>>" (exampleExpression e) (exampleResult e)) es - - where inlineFallback = B.plain $ docHToInlines False d' - consolidatePlains = B.fromList . consolidatePlains' . B.toList - consolidatePlains' zs@(Plain _ : _) = - let (xs, ys) = span isPlain zs in - Para (concatMap extractContents xs) : consolidatePlains' ys - consolidatePlains' (x : xs) = x : consolidatePlains' xs - consolidatePlains' [] = [] - isPlain (Plain _) = True - isPlain _ = False - extractContents (Plain xs) = xs - extractContents _ = [] - -docHToInlines :: Bool -> DocH String Identifier -> Inlines -docHToInlines isCode d' = - case d' of - DocEmpty -> mempty - DocAppend d1 d2 -> mappend (docHToInlines isCode d1) - (docHToInlines isCode d2) - DocString s - | isCode -> mconcat $ intersperse B.linebreak - $ map B.code $ splitBy (=='\n') s - | otherwise -> B.text s - DocParagraph _ -> mempty - DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s - DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s - DocModule s -> B.codeWith ("",["haskell","module"],[]) s - DocWarning _ -> mempty -- TODO - DocEmphasis d -> B.emph (docHToInlines isCode d) - DocMonospaced (DocString s) -> B.code s - DocMonospaced d -> docHToInlines True d - DocBold d -> B.strong (docHToInlines isCode d) -#if MIN_VERSION_haddock_library(1,4,0) - DocMathInline s -> B.math s - DocMathDisplay s -> B.displayMath s -#endif - DocHeader _ -> mempty - DocUnorderedList _ -> mempty - DocOrderedList _ -> mempty - DocDefList _ -> mempty - DocCodeBlock _ -> mempty - DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) - (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h) - DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) - (maybe mempty B.text $ pictureTitle p) - DocAName s -> B.spanWith (s,["anchor"],[]) mempty - DocProperty _ -> mempty - DocExamples _ -> mempty - --- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks -makeExample prompt expression result = - B.para $ B.codeWith ("",["prompt"],[]) prompt - <> B.space - <> B.codeWith ([], ["haskell","expr"], []) (trim expression) - <> B.linebreak - <> (mconcat $ intersperse B.linebreak $ map coder result') - where - -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile (`elem` " \t") prompt - - -- 2. drop, if possible, the exact same sequence of whitespace - -- characters from each result line - -- - -- 3. interpret lines that only contain the string "<BLANKLINE>" as an - -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result - where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - - substituteBlankLine "<BLANKLINE>" = "" - substituteBlankLine line = line - coder = B.codeWith ([], ["result"], []) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs deleted file mode 100644 index 9f9a79535..000000000 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ /dev/null @@ -1,1437 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of LaTeX to 'Pandoc' document. --} -module Text.Pandoc.Readers.LaTeX ( readLaTeX, - rawLaTeXInline, - rawLaTeXBlock, - inlineCommand, - ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Shared -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, - mathDisplay, mathInline) -import Data.Char ( chr, ord, isLetter, isAlphaNum ) -import Control.Monad -import Text.Pandoc.Builder -import Control.Applicative ((<|>), many, optional) -import Data.Maybe (fromMaybe, maybeToList) -import System.FilePath (replaceExtension, takeExtension, addExtension) -import Data.List (intercalate) -import qualified Data.Map as M -import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) -import Text.Pandoc.ImageSize (numUnit, showFl) -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, report, - readFileFromDirs) - --- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> m Pandoc -readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx - case parsed of - Right result -> return result - Left e -> throwError e - -parseLaTeX :: PandocMonad m => LP m Pandoc -parseLaTeX = do - bs <- blocks - eof - st <- getState - let meta = stateMeta st - let (Pandoc _ bs') = doc bs - return $ Pandoc meta bs' - -type LP m = ParserT String ParserState m - -anyControlSeq :: PandocMonad m => LP m String -anyControlSeq = do - char '\\' - next <- option '\n' anyChar - case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - -controlSeq :: PandocMonad m => String -> LP m String -controlSeq name = try $ do - char '\\' - case name of - "" -> mzero - [c] | not (isLetter c) -> string [c] - cs -> string cs <* notFollowedBy letter <* optional sp - return name - -dimenarg :: PandocMonad m => LP m String -dimenarg = try $ do - ch <- option "" $ string "=" - num <- many1 digit - dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] - return $ ch ++ num ++ dim - -sp :: PandocMonad m => LP m () -sp = whitespace <|> endline - -whitespace :: PandocMonad m => LP m () -whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - -endline :: PandocMonad m => LP m () -endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline) - -isLowerHex :: Char -> Bool -isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' - -tildeEscape :: PandocMonad m => LP m Char -tildeEscape = try $ do - string "^^" - c <- satisfy (\x -> x >= '\0' && x <= '\128') - d <- if isLowerHex c - then option "" $ count 1 (satisfy isLowerHex) - else return "" - if null d - then case ord c of - x | x >= 64 && x <= 127 -> return $ chr (x - 64) - | otherwise -> return $ chr (x + 64) - else return $ chr $ read ('0':'x':c:d) - -comment :: PandocMonad m => LP m () -comment = do - char '%' - skipMany (satisfy (/='\n')) - optional newline - return () - -bgroup :: PandocMonad m => LP m () -bgroup = try $ do - skipMany (spaceChar <|> try (newline <* notFollowedBy blankline)) - () <$ char '{' - <|> () <$ controlSeq "bgroup" - <|> () <$ controlSeq "begingroup" - -egroup :: PandocMonad m => LP m () -egroup = () <$ char '}' - <|> () <$ controlSeq "egroup" - <|> () <$ controlSeq "endgroup" - -grouped :: PandocMonad m => Monoid a => LP m a -> LP m a -grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup) - -braced :: PandocMonad m => LP m String -braced = bgroup *> (concat <$> manyTill - ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) - <|> try (string "\\}") - <|> try (string "\\{") - <|> try (string "\\\\") - <|> ((\x -> "{" ++ x ++ "}") <$> braced) - <|> count 1 anyChar - ) egroup) - -bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a -bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) - -mathDisplay :: PandocMonad m => LP m String -> LP m Inlines -mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) - -mathInline :: PandocMonad m => LP m String -> LP m Inlines -mathInline p = math <$> (try p >>= applyMacros') - -mathChars :: PandocMonad m => LP m String -mathChars = - concat <$> many (escapedChar - <|> (snd <$> withRaw braced) - <|> many1 (satisfy isOrdChar)) - where escapedChar = try $ do char '\\' - c <- anyChar - return ['\\',c] - isOrdChar '$' = False - isOrdChar '{' = False - isOrdChar '}' = False - isOrdChar '\\' = False - isOrdChar _ = True - -quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines -quoted' f starter ender = do - startchs <- starter - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then do - ils <- many (notFollowedBy ender >> inline) - (ender >> return (f (mconcat ils))) <|> - (<> mconcat ils) <$> - lit (case startchs of - "``" -> "“" - "`" -> "‘" - _ -> startchs) - else lit startchs - -doubleQuote :: PandocMonad m => LP m Inlines -doubleQuote = do - quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") - <|> quoted' doubleQuoted (string "“") (void $ char '”') - -- the following is used by babel for localized quotes: - <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") - <|> quoted' doubleQuoted (string "\"") (void $ char '"') - -singleQuote :: PandocMonad m => LP m Inlines -singleQuote = do - smart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if smart - then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) - <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - else str <$> many1 (oneOf "`\'‘’") - -inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> inlineText - <|> inlineCommand - <|> inlineEnvironment - <|> inlineGroup - <|> (char '-' *> option (str "-") - (char '-' *> option (str "–") (str "—" <$ char '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (string "''")) - <|> (str "”" <$ char '”') - <|> (str "’" <$ char '\'') - <|> (str "’" <$ char '’') - <|> (str "\160" <$ char '~') - <|> mathDisplay (string "$$" *> mathChars <* string "$$") - <|> mathInline (char '$' *> mathChars <* char '$') - <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) - <|> (str . (:[]) <$> tildeEscape) - <|> (do res <- oneOf "#&~^'`\"[]" - pos <- getPosition - report $ ParsingUnescaped [res] pos - return $ str [res]) - -inlines :: PandocMonad m => LP m Inlines -inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) - -inlineGroup :: PandocMonad m => LP m Inlines -inlineGroup = do - ils <- grouped inline - if isNull ils - then return mempty - else return $ spanWith nullAttr ils - -- we need the span so we can detitlecase bibtex entries; - -- we need to know when something is {C}apitalized - -block :: PandocMonad m => LP m Blocks -block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) - <|> environment - <|> include - <|> macro - <|> blockCommand - <|> paragraph - <|> grouped block - <|> (mempty <$ char '&') -- loose & in table environment - - -blocks :: PandocMonad m => LP m Blocks -blocks = mconcat <$> many block - -getRawCommand :: PandocMonad m => String -> LP m String -getRawCommand name' = do - rawargs <- withRaw (many (try (optional sp *> opt)) *> - option "" (try (optional sp *> dimenarg)) *> - many braced) - return $ '\\' : name' ++ snd rawargs - -lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v -lookupListDefault d = (fromMaybe d .) . lookupList - where - lookupList l m = msum $ map (`M.lookup` m) l - -blockCommand :: PandocMonad m => LP m Blocks -blockCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - star <- option "" (string "*" <* optional sp) - let name' = name ++ star - let raw = do - rawcommand <- getRawCommand name' - transformed <- applyMacros' rawcommand - guard $ transformed /= rawcommand - notFollowedBy $ parseFromString inlines transformed - parseFromString blocks transformed - lookupListDefault raw [name',name] blockCommands - -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" - --- eat an optional argument and one or more arguments in braces -ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines) -ignoreInlines name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawInline "latex" rawCommand) - doraw <|> ignore rawCommand - -guardRaw :: PandocMonad m => LP m () -guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex - -optargs :: PandocMonad m => LP m String -optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced)) - -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a -ignore raw = do - pos <- getPosition - report $ SkippedContent raw pos - return mempty - -ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) -ignoreBlocks name = (name, p) - where - p = do oa <- optargs - let rawCommand = '\\':name ++ oa - let doraw = guardRaw >> return (rawBlock "latex" rawCommand) - doraw <|> ignore rawCommand - -blockCommands :: PandocMonad m => M.Map String (LP m Blocks) -blockCommands = M.fromList $ - [ ("par", mempty <$ skipopts) - , ("parbox", braced >> grouped blocks) - , ("title", mempty <$ (skipopts *> - (grouped inline >>= addMeta "title") - <|> (grouped block >>= addMeta "title"))) - , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) - , ("author", mempty <$ (skipopts *> authors)) - -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) - , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - -- sectioning - , ("chapter", updateState (\s -> s{ stateHasChapters = True }) - *> section nullAttr 0) - , ("chapter*", updateState (\s -> s{ stateHasChapters = True }) - *> section ("",["unnumbered"],[]) 0) - , ("section", section nullAttr 1) - , ("section*", section ("",["unnumbered"],[]) 1) - , ("subsection", section nullAttr 2) - , ("subsection*", section ("",["unnumbered"],[]) 2) - , ("subsubsection", section nullAttr 3) - , ("subsubsection*", section ("",["unnumbered"],[]) 3) - , ("paragraph", section nullAttr 4) - , ("paragraph*", section ("",["unnumbered"],[]) 4) - , ("subparagraph", section nullAttr 5) - , ("subparagraph*", section ("",["unnumbered"],[]) 5) - -- beamer slides - , ("frametitle", section nullAttr 3) - , ("framesubtitle", section nullAttr 4) - -- letters - , ("opening", (para . trimInlines) <$> (skipopts *> tok)) - , ("closing", skipopts *> closing) - -- - , ("hrule", pure horizontalRule) - , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> looseItem) - , ("documentclass", skipopts *> braced *> preamble) - , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", skipopts *> setCaption) - , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs)) - -- includes - , ("lstinputlisting", inputListing) - ] ++ map ignoreBlocks - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks - [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" - -- newcommand, etc. should be parsed by macro, but we need this - -- here so these aren't parsed as inline commands to ignore - , "special", "pdfannot", "pdfstringdef" - , "bibliographystyle" - , "maketitle", "makeindex", "makeglossary" - , "addcontentsline", "addtocontents", "addtocounter" - -- \ignore{} is used conventionally in literate haskell for definitions - -- that are to be processed by the compiler but not printed. - , "ignore" - , "hyperdef" - , "markboth", "markright", "markleft" - , "newpage" - ] - -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ stateMeta = addMetaField field val $ stateMeta st } - -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') - -setCaption :: PandocMonad m => LP m Blocks -setCaption = do - ils <- tok - mblabel <- option Nothing $ - try $ spaces' >> controlSeq "label" >> (Just <$> tok) - let ils' = case mblabel of - Just lab -> ils <> spanWith - ("",[],[("data-label", stringify lab)]) mempty - Nothing -> ils - updateState $ \st -> st{ stateCaption = Just ils' } - return mempty - -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ stateCaption = Nothing } - -authors :: PandocMonad m => LP m () -authors = try $ do - char '{' - let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> - (inline <|> mempty <$ blockCommand)) - -- skip e.g. \vspace{10pt} - auths <- sepBy oneAuthor (controlSeq "and") - char '}' - addMeta "author" (map trimInlines auths) - -section :: PandocMonad m => Attr -> Int -> LP m Blocks -section (ident, classes, kvs) lvl = do - hasChapters <- stateHasChapters `fmap` getState - let lvl' = if hasChapters then lvl + 1 else lvl - skipopts - contents <- grouped inline - lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) - attr' <- registerHeader (lab, classes, kvs) contents - return $ headerWith attr' lvl' contents - -inlineCommand :: PandocMonad m => LP m Inlines -inlineCommand = try $ do - name <- anyControlSeq - guard $ name /= "begin" && name /= "end" - guard $ not $ isBlockCommand name - exts <- getOption readerExtensions - star <- option "" (string "*") - let name' = name ++ star - let raw = do - rawargs <- withRaw - (skipangles *> skipopts *> option "" dimenarg *> many braced) - let rawcommand = '\\' : name ++ star ++ snd rawargs - transformed <- applyMacros' rawcommand - if transformed /= rawcommand - then parseFromString inlines transformed - else if extensionEnabled Ext_raw_tex exts - then return $ rawInline "latex" rawcommand - else ignore rawcommand - (lookupListDefault mzero [name',name] inlineCommands <* - optional (try (string "{}"))) - <|> raw - -unlessParseRaw :: PandocMonad m => LP m () -unlessParseRaw = getOption readerExtensions >>= - guard . not . extensionEnabled Ext_raw_tex - -isBlockCommand :: String -> Bool -isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) - - -inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines) -inlineEnvironments = M.fromList - [ ("displaymath", mathEnv id Nothing "displaymath") - , ("math", math <$> verbEnv "math") - , ("equation", mathEnv id Nothing "equation") - , ("equation*", mathEnv id Nothing "equation*") - , ("gather", mathEnv id (Just "gathered") "gather") - , ("gather*", mathEnv id (Just "gathered") "gather*") - , ("multline", mathEnv id (Just "gathered") "multline") - , ("multline*", mathEnv id (Just "gathered") "multline*") - , ("eqnarray", mathEnv id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*") - , ("align", mathEnv id (Just "aligned") "align") - , ("align*", mathEnv id (Just "aligned") "align*") - , ("alignat", mathEnv id (Just "aligned") "alignat") - , ("alignat*", mathEnv id (Just "aligned") "alignat*") - ] - -inlineCommands :: PandocMonad m => M.Map String (LP m Inlines) -inlineCommands = M.fromList $ - [ ("emph", extractSpaces emph <$> tok) - , ("textit", extractSpaces emph <$> tok) - , ("textsl", extractSpaces emph <$> tok) - , ("textsc", extractSpaces smallcaps <$> tok) - , ("sout", extractSpaces strikeout <$> tok) - , ("textsuperscript", extractSpaces superscript <$> tok) - , ("textsubscript", extractSpaces subscript <$> tok) - , ("textbackslash", lit "\\") - , ("backslash", lit "\\") - , ("slash", lit "/") - , ("textbf", extractSpaces strong <$> tok) - , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) - , ("ldots", lit "…") - , ("dots", lit "…") - , ("mdots", lit "…") - , ("sim", lit "~") - , ("label", unlessParseRaw >> (inBrackets <$> tok)) - , ("ref", unlessParseRaw >> (inBrackets <$> tok)) - , ("noindent", unlessParseRaw >> ignore "noindent") - , ("textgreek", tok) - , ("sep", lit ",") - , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty - , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) - , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline braced) - , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) - , ("P", lit "¶") - , ("S", lit "§") - , ("$", lit "$") - , ("%", lit "%") - , ("&", lit "&") - , ("#", lit "#") - , ("_", lit "_") - , ("{", lit "{") - , ("}", lit "}") - -- old TeX commands - , ("em", extractSpaces emph <$> inlines) - , ("it", extractSpaces emph <$> inlines) - , ("sl", extractSpaces emph <$> inlines) - , ("bf", extractSpaces strong <$> inlines) - , ("rm", inlines) - , ("itshape", extractSpaces emph <$> inlines) - , ("slshape", extractSpaces emph <$> inlines) - , ("scshape", extractSpaces smallcaps <$> inlines) - , ("bfseries", extractSpaces strong <$> inlines) - , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", try $ tok >>= accent hungarumlaut) - , ("`", option (str "`") $ try $ tok >>= accent grave) - , ("'", option (str "'") $ try $ tok >>= accent acute) - , ("^", option (str "^") $ try $ tok >>= accent circ) - , ("~", option (str "~") $ try $ tok >>= accent tilde) - , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) - , (".", option (str ".") $ try $ tok >>= accent dot) - , ("=", option (str "=") $ try $ tok >>= accent macron) - , ("c", option (str "c") $ try $ tok >>= accent cedilla) - , ("v", option (str "v") $ try $ tok >>= accent hacek) - , ("u", option (str "u") $ try $ tok >>= accent breve) - , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) - , (",", pure mempty) - , ("@", pure mempty) - , (" ", lit "\160") - , ("ps", pure $ str "PS." <> space) - , ("TeX", lit "TeX") - , ("LaTeX", lit "LaTeX") - , ("bar", lit "|") - , ("textless", lit "<") - , ("textgreater", lit ">") - , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) - , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) - , ("verb", doverb) - , ("lstinline", skipopts *> doverb) - , ("Verb", doverb) - , ("texttt", (code . stringify . toList) <$> tok) - , ("url", (unescapeURL <$> braced) >>= \url -> - pure (link url "" (str url))) - , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> - tok >>= \lab -> - pure (link url "" lab)) - , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . removeDoubleQuotes <$> braced - mkImage options src) - , ("enquote", enquote) - , ("cite", citation "cite" NormalCitation False) - , ("Cite", citation "Cite" NormalCitation False) - , ("citep", citation "citep" NormalCitation False) - , ("citep*", citation "citep*" NormalCitation False) - , ("citeal", citation "citeal" NormalCitation False) - , ("citealp", citation "citealp" NormalCitation False) - , ("citealp*", citation "citealp*" NormalCitation False) - , ("autocite", citation "autocite" NormalCitation False) - , ("smartcite", citation "smartcite" NormalCitation False) - , ("footcite", inNote <$> citation "footcite" NormalCitation False) - , ("parencite", citation "parencite" NormalCitation False) - , ("supercite", citation "supercite" NormalCitation False) - , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) - , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) - , ("citeyear", citation "citeyear" SuppressAuthor False) - , ("autocite*", citation "autocite*" SuppressAuthor False) - , ("cite*", citation "cite*" SuppressAuthor False) - , ("parencite*", citation "parencite*" SuppressAuthor False) - , ("textcite", citation "textcite" AuthorInText False) - , ("citet", citation "citet" AuthorInText False) - , ("citet*", citation "citet*" AuthorInText False) - , ("citealt", citation "citealt" AuthorInText False) - , ("citealt*", citation "citealt*" AuthorInText False) - , ("textcites", citation "textcites" AuthorInText True) - , ("cites", citation "cites" NormalCitation True) - , ("autocites", citation "autocites" NormalCitation True) - , ("footcites", inNote <$> citation "footcites" NormalCitation True) - , ("parencites", citation "parencites" NormalCitation True) - , ("supercites", citation "supercites" NormalCitation True) - , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) - , ("Autocite", citation "Autocite" NormalCitation False) - , ("Smartcite", citation "Smartcite" NormalCitation False) - , ("Footcite", citation "Footcite" NormalCitation False) - , ("Parencite", citation "Parencite" NormalCitation False) - , ("Supercite", citation "Supercite" NormalCitation False) - , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) - , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) - , ("Citeyear", citation "Citeyear" SuppressAuthor False) - , ("Autocite*", citation "Autocite*" SuppressAuthor False) - , ("Cite*", citation "Cite*" SuppressAuthor False) - , ("Parencite*", citation "Parencite*" SuppressAuthor False) - , ("Textcite", citation "Textcite" AuthorInText False) - , ("Textcites", citation "Textcites" AuthorInText True) - , ("Cites", citation "Cites" NormalCitation True) - , ("Autocites", citation "Autocites" NormalCitation True) - , ("Footcites", citation "Footcites" NormalCitation True) - , ("Parencites", citation "Parencites" NormalCitation True) - , ("Supercites", citation "Supercites" NormalCitation True) - , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) - , ("citetext", complexNatbibCitation NormalCitation) - , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> - complexNatbibCitation AuthorInText) - <|> citation "citeauthor" AuthorInText False) - , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= - addMeta "nocite")) - ] ++ map ignoreInlines - -- these commands will be ignored unless --parse-raw is specified, - -- in which case they will appear as raw latex blocks: - [ "index" ] - -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do - let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") - _ -> (k, v) - let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options - let attr = ("",[], kvs) - let alt = str "image" - case takeExtension src of - "" -> do - defaultExt <- getOption readerDefaultImageExtension - return $ imageWith attr (addExtension src defaultExt) "" alt - _ -> return $ imageWith attr src "" alt - -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." - -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" - -enquote :: PandocMonad m => LP m Inlines -enquote = do - skipopts - context <- stateQuoteContext <$> getState - if context == InDoubleQuote - then singleQuoted <$> withQuoteContext InSingleQuote tok - else doubleQuoted <$> withQuoteContext InDoubleQuote tok - -doverb :: PandocMonad m => LP m Inlines -doverb = do - marker <- anyChar - code <$> manyTill (satisfy (/='\n')) (char marker) - -doLHSverb :: PandocMonad m => LP m Inlines -doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') - -lit :: String -> LP m Inlines -lit = pure . str - -accent :: (Char -> String) -> Inlines -> LP m Inlines -accent f ils = - case toList ils of - (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) - [] -> mzero - _ -> return ils - -grave :: Char -> String -grave 'A' = "À" -grave 'E' = "È" -grave 'I' = "Ì" -grave 'O' = "Ò" -grave 'U' = "Ù" -grave 'a' = "à" -grave 'e' = "è" -grave 'i' = "ì" -grave 'o' = "ò" -grave 'u' = "ù" -grave c = [c] - -acute :: Char -> String -acute 'A' = "Á" -acute 'E' = "É" -acute 'I' = "Í" -acute 'O' = "Ó" -acute 'U' = "Ú" -acute 'Y' = "Ý" -acute 'a' = "á" -acute 'e' = "é" -acute 'i' = "í" -acute 'o' = "ó" -acute 'u' = "ú" -acute 'y' = "ý" -acute 'C' = "Ć" -acute 'c' = "ć" -acute 'L' = "Ĺ" -acute 'l' = "ĺ" -acute 'N' = "Ń" -acute 'n' = "ń" -acute 'R' = "Ŕ" -acute 'r' = "ŕ" -acute 'S' = "Ś" -acute 's' = "ś" -acute 'Z' = "Ź" -acute 'z' = "ź" -acute c = [c] - -circ :: Char -> String -circ 'A' = "Â" -circ 'E' = "Ê" -circ 'I' = "Î" -circ 'O' = "Ô" -circ 'U' = "Û" -circ 'a' = "â" -circ 'e' = "ê" -circ 'i' = "î" -circ 'o' = "ô" -circ 'u' = "û" -circ 'C' = "Ĉ" -circ 'c' = "ĉ" -circ 'G' = "Ĝ" -circ 'g' = "ĝ" -circ 'H' = "Ĥ" -circ 'h' = "ĥ" -circ 'J' = "Ĵ" -circ 'j' = "ĵ" -circ 'S' = "Ŝ" -circ 's' = "ŝ" -circ 'W' = "Ŵ" -circ 'w' = "ŵ" -circ 'Y' = "Ŷ" -circ 'y' = "ŷ" -circ c = [c] - -tilde :: Char -> String -tilde 'A' = "Ã" -tilde 'a' = "ã" -tilde 'O' = "Õ" -tilde 'o' = "õ" -tilde 'I' = "Ĩ" -tilde 'i' = "ĩ" -tilde 'U' = "Ũ" -tilde 'u' = "ũ" -tilde 'N' = "Ñ" -tilde 'n' = "ñ" -tilde c = [c] - -umlaut :: Char -> String -umlaut 'A' = "Ä" -umlaut 'E' = "Ë" -umlaut 'I' = "Ï" -umlaut 'O' = "Ö" -umlaut 'U' = "Ü" -umlaut 'a' = "ä" -umlaut 'e' = "ë" -umlaut 'i' = "ï" -umlaut 'o' = "ö" -umlaut 'u' = "ü" -umlaut c = [c] - -hungarumlaut :: Char -> String -hungarumlaut 'A' = "A̋" -hungarumlaut 'E' = "E̋" -hungarumlaut 'I' = "I̋" -hungarumlaut 'O' = "Ő" -hungarumlaut 'U' = "Ű" -hungarumlaut 'Y' = "ӳ" -hungarumlaut 'a' = "a̋" -hungarumlaut 'e' = "e̋" -hungarumlaut 'i' = "i̋" -hungarumlaut 'o' = "ő" -hungarumlaut 'u' = "ű" -hungarumlaut 'y' = "ӳ" -hungarumlaut c = [c] - -dot :: Char -> String -dot 'C' = "Ċ" -dot 'c' = "ċ" -dot 'E' = "Ė" -dot 'e' = "ė" -dot 'G' = "Ġ" -dot 'g' = "ġ" -dot 'I' = "İ" -dot 'Z' = "Ż" -dot 'z' = "ż" -dot c = [c] - -macron :: Char -> String -macron 'A' = "Ā" -macron 'E' = "Ē" -macron 'I' = "Ī" -macron 'O' = "Ō" -macron 'U' = "Ū" -macron 'a' = "ā" -macron 'e' = "ē" -macron 'i' = "ī" -macron 'o' = "ō" -macron 'u' = "ū" -macron c = [c] - -cedilla :: Char -> String -cedilla 'c' = "ç" -cedilla 'C' = "Ç" -cedilla 's' = "ş" -cedilla 'S' = "Ş" -cedilla 't' = "ţ" -cedilla 'T' = "Ţ" -cedilla 'e' = "ȩ" -cedilla 'E' = "Ȩ" -cedilla 'h' = "ḩ" -cedilla 'H' = "Ḩ" -cedilla 'o' = "o̧" -cedilla 'O' = "O̧" -cedilla c = [c] - -hacek :: Char -> String -hacek 'A' = "Ǎ" -hacek 'a' = "ǎ" -hacek 'C' = "Č" -hacek 'c' = "č" -hacek 'D' = "Ď" -hacek 'd' = "ď" -hacek 'E' = "Ě" -hacek 'e' = "ě" -hacek 'G' = "Ǧ" -hacek 'g' = "ǧ" -hacek 'H' = "Ȟ" -hacek 'h' = "ȟ" -hacek 'I' = "Ǐ" -hacek 'i' = "ǐ" -hacek 'j' = "ǰ" -hacek 'K' = "Ǩ" -hacek 'k' = "ǩ" -hacek 'L' = "Ľ" -hacek 'l' = "ľ" -hacek 'N' = "Ň" -hacek 'n' = "ň" -hacek 'O' = "Ǒ" -hacek 'o' = "ǒ" -hacek 'R' = "Ř" -hacek 'r' = "ř" -hacek 'S' = "Š" -hacek 's' = "š" -hacek 'T' = "Ť" -hacek 't' = "ť" -hacek 'U' = "Ǔ" -hacek 'u' = "ǔ" -hacek 'Z' = "Ž" -hacek 'z' = "ž" -hacek c = [c] - -breve :: Char -> String -breve 'A' = "Ă" -breve 'a' = "ă" -breve 'E' = "Ĕ" -breve 'e' = "ĕ" -breve 'G' = "Ğ" -breve 'g' = "ğ" -breve 'I' = "Ĭ" -breve 'i' = "ĭ" -breve 'O' = "Ŏ" -breve 'o' = "ŏ" -breve 'U' = "Ŭ" -breve 'u' = "ŭ" -breve c = [c] - -tok :: PandocMonad m => LP m Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar - -opt :: PandocMonad m => LP m Inlines -opt = bracketed inline - -rawopt :: PandocMonad m => LP m String -rawopt = do - contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|> - try (string "\\[") <|> rawopt) - optional sp - return $ "[" ++ contents ++ "]" - -skipopts :: PandocMonad m => LP m () -skipopts = skipMany rawopt - --- opts in angle brackets are used in beamer -rawangle :: PandocMonad m => LP m () -rawangle = try $ do - char '<' - skipMany (noneOf ">") - char '>' - return () - -skipangles :: PandocMonad m => LP m () -skipangles = skipMany rawangle - -inlineText :: PandocMonad m => LP m Inlines -inlineText = str <$> many1 inlineChar - -inlineChar :: PandocMonad m => LP m Char -inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n" - -environment :: PandocMonad m => LP m Blocks -environment = do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name environments - <|> rawEnv name - -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- braced - M.findWithDefault mzero name inlineEnvironments - -rawEnv :: PandocMonad m => String -> LP m Blocks -rawEnv name = do - exts <- getOption readerExtensions - let parseRaw = extensionEnabled Ext_raw_tex exts - rawOptions <- mconcat <$> many rawopt - let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions - unless parseRaw $ do - pos1 <- getPosition - report $ SkippedContent beginCommand pos1 - (bs, raw) <- withRaw $ env name blocks - raw' <- applyMacros' raw - if parseRaw - then return $ rawBlock "latex" $ beginCommand ++ raw' - else do - pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ name ++ "}") pos2 - return bs - ----- - -braced' :: PandocMonad m => LP m String -braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') - -maybeAddExtension :: String -> FilePath -> FilePath -maybeAddExtension ext fp = - if null (takeExtension fp) - then addExtension fp ext - else fp - -include :: PandocMonad m => LP m Blocks -include = do - fs' <- try $ do - char '\\' - name <- try (string "include") - <|> try (string "input") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> manyTill anyChar (char ']') - fs <- (map trim . splitBy (==',')) <$> braced' - return $ if name == "usepackage" - then map (maybeAddExtension ".sty") fs - else map (maybeAddExtension ".tex") fs - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mconcat <$> mapM (insertIncludedFile blocks dirs) fs' - -inputListing :: PandocMonad m => LP m Blocks -inputListing = do - pos <- getPosition - options <- option [] keyvals - f <- filter (/='"') <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f - codeLines <- case mbCode of - Just s -> return $ lines s - Nothing -> do - report $ CouldNotLoadIncludeFile f pos - return [] - let (ident,classes,kvs) = parseListingsOptions options - let language = case lookup "language" options >>= fromListingsLanguage of - Just l -> [l] - Nothing -> take 1 $ languagesByExtension (takeExtension f) - let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead - let lastline = fromMaybe (length codeLines) $ - lookup "lastline" options >>= safeRead - let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ - drop (firstline - 1) codeLines - return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents - -parseListingsOptions :: [(String, String)] -> Attr -parseListingsOptions options = - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - ++ maybeToList (lookup "language" options - >>= fromListingsLanguage) - in (fromMaybe "" (lookup "label" options), classes, kvs) - ----- - -keyval :: PandocMonad m => LP m (String, String) -keyval = try $ do - key <- many1 alphaNum - val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') - skipMany spaceChar - optional (char ',') - skipMany spaceChar - return (key, val) - - -keyvals :: PandocMonad m => LP m [(String, String)] -keyvals = try $ char '[' *> manyTill keyval (char ']') - -alltt :: PandocMonad m => String -> LP m Blocks -alltt t = walk strToCode <$> parseFromString blocks - (substitute " " "\\ " $ substitute "%" "\\%" $ - intercalate "\\\\\n" $ lines t) - where strToCode (Str s) = Code nullAttr s - strToCode x = x - -rawLaTeXBlock :: PandocMonad m => LP m String -rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) - -rawLaTeXInline :: PandocMonad m => LP m Inline -rawLaTeXInline = do - raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) - RawInline "latex" <$> applyMacros' raw - -addImageCaption :: PandocMonad m => Blocks -> LP m Blocks -addImageCaption = walkM go - where go (Image attr alt (src,tit)) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Image attr (toList ils) (src, "fig:") - Nothing -> Image attr alt (src,tit) - go x = return x - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table c als ws hs rs) = do - mbcapt <- stateCaption <$> getState - return $ case mbcapt of - Just ils -> Table (toList ils) als ws hs rs - Nothing -> Table c als ws hs rs - go x = return x - -environments :: PandocMonad m => M.Map String (LP m Blocks) -environments = M.fromList - [ ("document", env "document" blocks <* skipMany anyChar) - , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) - , ("letter", env "letter" letterContents) - , ("minipage", env "minipage" $ - skipopts *> spaces' *> optional braced *> spaces' *> blocks) - , ("figure", env "figure" $ - resetCaption *> skipopts *> blocks >>= addImageCaption) - , ("center", env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable False >>= addTableCaption) - , ("table", env "table" $ - resetCaption *> skipopts *> blocks >>= addTableCaption) - , ("tabular*", env "tabular" $ simpTable True) - , ("tabular", env "tabular" $ simpTable False) - , ("quote", blockQuote <$> env "quote" blocks) - , ("quotation", blockQuote <$> env "quotation" blocks) - , ("verse", blockQuote <$> env "verse" blocks) - , ("itemize", bulletList <$> listenv "itemize" (many item)) - , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", orderedList') - , ("alltt", alltt =<< verbEnv "alltt") - , ("code", guardEnabled Ext_literate_haskell *> - (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - verbEnv "code")) - , ("comment", mempty <$ verbEnv "comment") - , ("verbatim", codeBlock <$> verbEnv "verbatim") - , ("Verbatim", fancyverbEnv "Verbatim") - , ("BVerbatim", fancyverbEnv "BVerbatim") - , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals - codeBlockWith attr <$> verbEnv "lstlisting") - , ("minted", do options <- option [] keyvals - lang <- grouped (many1 $ satisfy (/='}')) - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ - [ "numberLines" | - lookup "linenos" options == Just "true" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv "minted") - , ("obeylines", parseFromString - (para . trimInlines . mconcat <$> many inline) =<< - intercalate "\\\\\n" . lines <$> verbEnv "obeylines") - , ("displaymath", mathEnv para Nothing "displaymath") - , ("equation", mathEnv para Nothing "equation") - , ("equation*", mathEnv para Nothing "equation*") - , ("gather", mathEnv para (Just "gathered") "gather") - , ("gather*", mathEnv para (Just "gathered") "gather*") - , ("multline", mathEnv para (Just "gathered") "multline") - , ("multline*", mathEnv para (Just "gathered") "multline*") - , ("eqnarray", mathEnv para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*") - , ("align", mathEnv para (Just "aligned") "align") - , ("align*", mathEnv para (Just "aligned") "align*") - , ("alignat", mathEnv para (Just "aligned") "alignat") - , ("alignat*", mathEnv para (Just "aligned") "alignat*") - ] - -letterContents :: PandocMonad m => LP m Blocks -letterContents = do - bs <- blocks - st <- getState - -- add signature (author) and address (title) - let addr = case lookupMeta "address" (stateMeta st) of - Just (MetaBlocks [Plain xs]) -> - para $ trimInlines $ fromList xs - _ -> mempty - return $ addr <> bs -- sig added by \closing - -closing :: PandocMonad m => LP m Blocks -closing = do - contents <- tok - st <- getState - let extractInlines (MetaBlocks [Plain ys]) = ys - extractInlines (MetaBlocks [Para ys ]) = ys - extractInlines _ = [] - let sigs = case lookupMeta "author" (stateMeta st) of - Just (MetaList xs) -> - para $ trimInlines $ fromList $ - intercalate [LineBreak] $ map extractInlines xs - _ -> mempty - return $ para (trimInlines contents) <> sigs - -item :: PandocMonad m => LP m Blocks -item = blocks *> controlSeq "item" *> skipopts *> blocks - -looseItem :: PandocMonad m => LP m Blocks -looseItem = do - ctx <- stateParserContext `fmap` getState - if ctx == ListItemState - then mzero - else return mempty - -descItem :: PandocMonad m => LP m (Inlines, [Blocks]) -descItem = do - blocks -- skip blocks before item - controlSeq "item" - optional sp - ils <- opt - bs <- blocks - return (ils, [bs]) - -env :: PandocMonad m => String -> LP m a -> LP m a -env name p = p <* - (try (controlSeq "end" *> braced >>= guard . (== name)) - <?> ("\\end{" ++ name ++ "}")) - -listenv :: PandocMonad m => String -> LP m a -> LP m a -listenv name p = try $ do - oldCtx <- stateParserContext `fmap` getState - updateState $ \st -> st{ stateParserContext = ListItemState } - res <- env name p - updateState $ \st -> st{ stateParserContext = oldCtx } - return res - -mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a -mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name) - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ - "\\end{" ++ y ++ "}" - -verbEnv :: PandocMonad m => String -> LP m String -verbEnv name = do - skipopts - optional blankline - let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) - res <- manyTill anyChar endEnv - return $ stripTrailingNewlines res - -fancyverbEnv :: PandocMonad m => String -> LP m Blocks -fancyverbEnv name = do - options <- option [] keyvals - let kvs = [ (if k == "firstnumber" - then "startFrom" - else k, v) | (k,v) <- options ] - let classes = [ "numberLines" | - lookup "numbers" options == Just "left" ] - let attr = ("",classes,kvs) - codeBlockWith attr <$> verbEnv name - -orderedList' :: PandocMonad m => LP m Blocks -orderedList' = do - optional sp - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ char '[' *> anyOrderedListMarker <* char ']' - spaces - optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced - spaces - start <- option 1 $ try $ do controlSeq "setcounter" - grouped (string "enum" *> many1 (oneOf "iv")) - optional sp - num <- grouped (many1 digit) - spaces - return (read num + 1 :: Int) - bs <- listenv "enumerate" (many item) - return $ orderedListWith (start, style, delim) bs - -paragraph :: PandocMonad m => LP m Blocks -paragraph = do - x <- trimInlines . mconcat <$> many1 inline - if x == mempty - then return mempty - else return $ para x - -preamble :: PandocMonad m => LP m Blocks -preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = void comment - <|> void sp - <|> void blanklines - <|> void include - <|> void macro - <|> void blockCommand - <|> void anyControlSeq - <|> void braced - <|> void anyChar - -------- - --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - char '{' - optional sp - keys <- manyTill citationLabel (char '}') - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m String -citationLabel = optional sp *> - (many1 (satisfy isBibtexKeyChar) - <* optional sp - <* optional (char ',') - <* optional sp) - where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String) - -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then many1 simpleCiteArgs - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs - -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) - -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - let ils = (toList . trimInlines . mconcat) <$> - many (notFollowedBy (oneOf "\\};") >> inline) - let parseOne = try $ do - skipSpaces - pref <- ils - cit' <- inline -- expect a citation - let citlist = toList cit' - cits' <- case citlist of - [Cite cs _] -> return cs - _ -> mzero - suff <- ils - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff cits' - (c:cits, raw) <- withRaw $ grouped parseOne - return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ raw) - --- tables - -parseAligns :: PandocMonad m => LP m [(String, Alignment, String)] -parseAligns = try $ do - char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) - maybeBar - let cAlign = AlignCenter <$ char 'c' - let lAlign = AlignLeft <$ char 'l' - let rAlign = AlignRight <$ char 'r' - let parAlign = AlignLeft <$ (char 'p' >> braced) - let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign - let alignPrefix = char '>' >> braced - let alignSuffix = char '<' >> braced - let alignSpec = do - spaces - pref <- option "" alignPrefix - spaces - ch <- alignChar - spaces - suff <- option "" alignSuffix - return (pref, ch, suff) - aligns' <- sepEndBy alignSpec maybeBar - spaces - char '}' - spaces - return $ aligns' - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces' - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces' - optional $ bracketed (many1 (satisfy (/=']'))) - return () - -lbreak :: PandocMonad m => LP m () -lbreak = () <$ try (spaces' *> - (controlSeq "\\" <|> controlSeq "tabularnewline") <* - spaces') - -amp :: PandocMonad m => LP m () -amp = () <$ try (spaces' *> char '&' <* spaces') - -parseTableRow :: PandocMonad m - => Int -- ^ number of columns - -> [String] -- ^ prefixes - -> [String] -- ^ suffixes - -> LP m [Blocks] -parseTableRow cols prefixes suffixes = try $ do - let tableCellRaw = many (notFollowedBy - (amp <|> lbreak <|> - (() <$ try (string "\\end"))) >> anyChar) - let minipage = try $ controlSeq "begin" *> string "{minipage}" *> - env "minipage" - (skipopts *> spaces' *> optional braced *> spaces' *> blocks) - let tableCell = minipage <|> - ((plain . trimInlines . mconcat) <$> many inline) - rawcells <- sepBy1 tableCellRaw amp - guard $ length rawcells == cols - let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s) - rawcells prefixes suffixes - cells' <- mapM (parseFromString tableCell) rawcells' - let numcells = length cells' - guard $ numcells <= cols && numcells >= 1 - guard $ cells' /= [mempty] - -- note: a & b in a three-column table leaves an empty 3rd cell: - let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces' - return cells'' - -spaces' :: PandocMonad m => LP m () -spaces' = spaces *> skipMany (comment *> spaces) - -simpTable :: PandocMonad m => Bool -> LP m Blocks -simpTable hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces' >> tok) - skipopts - (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns - let cols = length aligns - optional $ controlSeq "caption" *> skipopts *> setCaption - optional lbreak - spaces' - skipMany hline - spaces' - header' <- option [] $ try (parseTableRow cols prefixes suffixes <* - lbreak <* many1 hline) - spaces' - rows <- sepEndBy (parseTableRow cols prefixes suffixes) - (lbreak <* optional (skipMany hline)) - spaces' - optional $ controlSeq "caption" *> skipopts *> setCaption - optional lbreak - spaces' - let header'' = if null header' - then replicate cols mempty - else header' - lookAhead $ controlSeq "end" -- make sure we're at end - return $ table mempty (zip aligns (repeat 0)) header'' rows - -removeDoubleQuotes :: String -> String -removeDoubleQuotes ('"':xs) = - case reverse xs of - '"':ys -> reverse ys - _ -> '"':xs -removeDoubleQuotes xs = xs diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs deleted file mode 100644 index 80a1cd7a2..000000000 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ /dev/null @@ -1,2119 +0,0 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 -{-# LANGUAGE ScopedTypeVariables #-} - -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of markdown-formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Markdown ( readMarkdown ) where - -import Data.List ( transpose, sortBy, findIndex, intercalate ) -import qualified Data.Map as M -import Data.Scientific (coefficient, base10Exponent) -import Data.Ord ( comparing ) -import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation ) -import Data.Maybe -import Text.Pandoc.Definition -import Text.Pandoc.Emoji (emojis) -import Text.Pandoc.Generic (bottomUp) -import qualified Data.Text as T -import Data.Text (Text) -import qualified Data.Yaml as Yaml -import Data.Yaml (ParseException(..), YamlException(..), YamlMark(..)) -import qualified Data.HashMap.Strict as H -import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Shared -import Text.Pandoc.Pretty (charWidth) -import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Parsing hiding (tableWith) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, - isTextTag, isCommentTag ) -import Control.Monad -import System.FilePath (takeExtension, addExtension) -import Text.HTML.TagSoup -import Data.Monoid ((<>)) -import Control.Monad.Trans (lift) -import Control.Monad.Except (throwError, catchError) -import Text.Pandoc.Class (PandocMonad, report) - -type MarkdownParser m = ParserT [Char] ParserState m - --- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readMarkdown opts s = do - parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - case parsed of - Right result -> return result - Left e -> throwError e - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - --- --- Constants and data structure definitions --- - -isBulletListMarker :: Char -> Bool -isBulletListMarker '*' = True -isBulletListMarker '+' = True -isBulletListMarker '-' = True -isBulletListMarker _ = False - -isHruleChar :: Char -> Bool -isHruleChar '*' = True -isHruleChar '-' = True -isHruleChar '_' = True -isHruleChar _ = False - -setextHChars :: String -setextHChars = "=-" - -isBlank :: Char -> Bool -isBlank ' ' = True -isBlank '\t' = True -isBlank '\n' = True -isBlank _ = False - --- --- auxiliary functions --- - --- | Succeeds when we're in list context. -inList :: PandocMonad m => MarkdownParser m () -inList = do - ctx <- stateParserContext <$> getState - guard (ctx == ListItemState) - -spnl :: PandocMonad m => ParserT [Char] st m () -spnl = try $ do - skipSpaces - optional newline - skipSpaces - notFollowedBy (char '\n') - -indentSpaces :: PandocMonad m => MarkdownParser m String -indentSpaces = try $ do - tabStop <- getOption readerTabStop - count tabStop (char ' ') <|> - string "\t" <?> "indentation" - -nonindentSpaces :: PandocMonad m => MarkdownParser m String -nonindentSpaces = do - tabStop <- getOption readerTabStop - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" - --- returns number of spaces parsed -skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int -skipNonindentSpaces = do - tabStop <- getOption readerTabStop - atMostSpaces (tabStop - 1) <* notFollowedBy (char ' ') - -atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int -atMostSpaces n - | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 - | otherwise = return 0 - -litChar :: PandocMonad m => MarkdownParser m Char -litChar = escapedChar' - <|> characterReference - <|> noneOf "\n" - <|> try (newline >> notFollowedBy blankline >> return ' ') - --- | Parse a sequence of inline elements between square brackets, --- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) -inlinesInBalancedBrackets = do - char '[' - (_, raw) <- withRaw $ charsInBalancedBrackets 1 - guard $ not $ null raw - parseFromString (trimInlinesF . mconcat <$> many inline) (init raw) - -charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m () -charsInBalancedBrackets 0 = return () -charsInBalancedBrackets openBrackets = - (char '[' >> charsInBalancedBrackets (openBrackets + 1)) - <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) - <|> (( (() <$ code) - <|> (() <$ (escapedChar')) - <|> (newline >> notFollowedBy blankline) - <|> skipMany1 (noneOf "[]`\n\\") - <|> (() <$ count 1 (oneOf "`\\")) - ) >> charsInBalancedBrackets openBrackets) - --- --- document structure --- - -rawTitleBlockLine :: PandocMonad m => MarkdownParser m String -rawTitleBlockLine = do - char '%' - skipSpaces - first <- anyLine - rest <- many $ try $ do spaceChar - notFollowedBy blankline - skipSpaces - anyLine - return $ trim $ unlines (first:rest) - -titleLine :: PandocMonad m => MarkdownParser m (F Inlines) -titleLine = try $ do - raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res - -authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines]) -authorsLine = try $ do - raw <- rawTitleBlockLine - let sep = (char ';' <* spaces) <|> newline - let pAuthors = sepEndBy - (trimInlinesF . mconcat <$> many - (try $ notFollowedBy sep >> inline)) - sep - sequence <$> parseFromString pAuthors raw - -dateLine :: PandocMonad m => MarkdownParser m (F Inlines) -dateLine = try $ do - raw <- rawTitleBlockLine - res <- parseFromString (many inline) raw - return $ trimInlinesF $ mconcat res - -titleBlock :: PandocMonad m => MarkdownParser m () -titleBlock = pandocTitleBlock <|> mmdTitleBlock - -pandocTitleBlock :: PandocMonad m => MarkdownParser m () -pandocTitleBlock = try $ do - guardEnabled Ext_pandoc_title_block - lookAhead (char '%') - title <- option mempty titleLine - author <- option (return []) authorsLine - date <- option mempty dateLine - optional blanklines - let meta' = do title' <- title - author' <- author - date' <- date - return $ - (if B.isNull title' then id else B.setMeta "title" title') - . (if null author' then id else B.setMeta "author" author') - . (if B.isNull date' then id else B.setMeta "date" date') - $ nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } - - --- Adapted from solution at --- http://stackoverflow.com/a/29448764/1901888 -foldrWithKeyM :: Monad m => (k -> b -> a -> m a) -> a -> H.HashMap k b -> m a -foldrWithKeyM f acc = H.foldrWithKey f' (return acc) - where - f' k b ma = ma >>= \a -> f k b a - -yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) -yamlMetaBlock = try $ do - guardEnabled Ext_yaml_metadata_block - pos <- getPosition - string "---" - blankline - notFollowedBy blankline -- if --- is followed by a blank it's an HRULE - rawYamlLines <- manyTill anyLine stopLine - -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) - optional blanklines - opts <- stateOptions <$> getState - meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> - foldrWithKeyM - (\k v m -> do - if ignorable k - then return m - else (do v' <- lift $ yamlToMeta opts v - return $ B.setMeta (T.unpack k) v' m) - `catchError` - (\_ -> return m) - ) nullMeta hashmap - Right Yaml.Null -> return nullMeta - Right _ -> do - logMessage $ - CouldNotParseYamlMetadata "not an object" - pos - return nullMeta - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - logMessage $ CouldNotParseYamlMetadata - problem (setSourceLine - (setSourceColumn pos - (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - _ -> logMessage $ CouldNotParseYamlMetadata - (show err') pos - return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> (return meta') } - return mempty - --- ignore fields ending with _ -ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t - -toMetaValue :: PandocMonad m => ReaderOptions -> Text -> m MetaValue -toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) - where - toMeta p = - case p of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] - | endsWithNewline x -> MetaBlocks [Para xs] - | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions = - disableExtension Ext_pandoc_title_block $ - disableExtension Ext_mmd_title_block $ - disableExtension Ext_yaml_metadata_block $ - readerExtensions opts } - -yamlToMeta :: PandocMonad m => ReaderOptions -> Yaml.Value -> m MetaValue -yamlToMeta opts (Yaml.String t) = toMetaValue opts t -yamlToMeta _ (Yaml.Number n) - -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = return $ MetaString $ show - $ coefficient n * (10 ^ base10Exponent n) - | otherwise = return $ MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) - (V.toList xs) -yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> - if ignorable k - then m - else (do - v' <- yamlToMeta opts v - m' <- m - return (M.insert (T.unpack k) v' m'))) - (return M.empty) o -yamlToMeta _ _ = return $ MetaString "" - -stopLine :: PandocMonad m => MarkdownParser m () -stopLine = try $ (string "---" <|> string "...") >> blankline >> return () - -mmdTitleBlock :: PandocMonad m => MarkdownParser m () -mmdTitleBlock = try $ do - guardEnabled Ext_mmd_title_block - firstPair <- kvPair False - restPairs <- many (kvPair True) - let kvPairs = firstPair : restPairs - blanklines - updateState $ \st -> st{ stateMeta' = stateMeta' st <> - return (Meta $ M.fromList kvPairs) } - -kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) -kvPair allowEmpty = try $ do - key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - val <- trim <$> manyTill anyChar - (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ allowEmpty || not (null val) - let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val - return (key',val') - -parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc -parseMarkdown = do - optional titleBlock - blocks <- parseBlocks - st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st - eastAsianLineBreaks <- option False $ - True <$ guardEnabled Ext_east_asian_line_breaks - reportLogMessages - return $ (if eastAsianLineBreaks - then bottomUp softBreakFilter - else id) $ Pandoc meta bs - -softBreakFilter :: [Inline] -> [Inline] -softBreakFilter (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), (c:_)) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs -softBreakFilter xs = xs - -referenceKey :: PandocMonad m => MarkdownParser m (F Blocks) -referenceKey = try $ do - pos <- getPosition - skipNonindentSpaces - (_,raw) <- reference - char ':' - skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = liftM unwords $ many $ try $ do - skipMany spaceChar - notFollowedBy' referenceTitle - notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes - notFollowedBy' (() <$ reference) - many1 $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTill litChar (char '>') - src <- try betweenAngles <|> sourceURL - tit <- option "" referenceTitle - attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> skipSpaces >> attributes - addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes - >> many (try $ spnl >> keyValAttr) - blanklines - let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs - target = (escapeURI $ trimr src, tit) - st <- getState - let oldkeys = stateKeys st - let key = toKey raw - case M.lookup key oldkeys of - Just _ -> logMessage $ DuplicateLinkReference raw pos - Nothing -> return () - updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } - return $ return mempty - -referenceTitle :: PandocMonad m => MarkdownParser m String -referenceTitle = try $ do - skipSpaces >> optional newline >> skipSpaces - quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar - --- A link title in quotes -quotedTitle :: PandocMonad m => Char -> MarkdownParser m String -quotedTitle c = try $ do - char c - notFollowedBy spaces - let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar - let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c - unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder - --- | PHP Markdown Extra style abbreviation key. Currently --- we just skip them, since Pandoc doesn't have an element for --- an abbreviation. -abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks) -abbrevKey = do - guardEnabled Ext_abbreviations - try $ do - char '*' - reference - char ':' - skipMany (satisfy (/= '\n')) - blanklines - return $ return mempty - -noteMarker :: PandocMonad m => MarkdownParser m String -noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') - -rawLine :: PandocMonad m => MarkdownParser m String -rawLine = try $ do - notFollowedBy blankline - notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker - optional indentSpaces - anyLine - -rawLines :: PandocMonad m => MarkdownParser m String -rawLines = do - first <- anyLine - rest <- many rawLine - return $ unlines (first:rest) - -noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) -noteBlock = try $ do - pos <- getPosition - skipNonindentSpaces - ref <- noteMarker - char ':' - optional blankline - optional indentSpaces - first <- rawLines - rest <- many $ try $ blanklines >> indentSpaces >> rawLines - let raw = unlines (first:rest) ++ "\n" - optional blanklines - parsed <- parseFromString parseBlocks raw - let newnote = (ref, parsed) - oldnotes <- stateNotes' <$> getState - case lookup ref oldnotes of - Just _ -> logMessage $ DuplicateNoteReference ref pos - Nothing -> return () - updateState $ \s -> s { stateNotes' = newnote : oldnotes } - return mempty - --- --- parsing blocks --- - -parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks) -parseBlocks = mconcat <$> manyTill block eof - -block :: PandocMonad m => MarkdownParser m (F Blocks) -block = do - pos <- getPosition - res <- choice [ mempty <$ blanklines - , codeBlockFenced - , yamlMetaBlock - -- note: bulletList needs to be before header because of - -- the possibility of empty list items: - - , bulletList - , header - , lhsCodeBlock - , divHtml - , htmlBlock - , table - , codeBlockIndented - , guardEnabled Ext_latex_macros *> (macro >>= return . return) - , rawTeXBlock - , lineBlock - , blockQuote - , hrule - , orderedList - , definitionList - , noteBlock - , referenceKey - , abbrevKey - , para - , plain - ] <?> "block" - report $ ParsingTrace - (take 60 $ show $ B.toList $ runF res defaultParserState) pos - return res - --- --- header blocks --- - -header :: PandocMonad m => MarkdownParser m (F Blocks) -header = setextHeader <|> atxHeader <?> "header" - -atxChar :: PandocMonad m => MarkdownParser m Char -atxChar = do - exts <- getOption readerExtensions - return $ if extensionEnabled Ext_literate_haskell exts - then '=' - else '#' - -atxHeader :: PandocMonad m => MarkdownParser m (F Blocks) -atxHeader = try $ do - level <- atxChar >>= many1 . char >>= return . length - notFollowedBy $ guardEnabled Ext_fancy_lists >> - (char '.' <|> char ')') -- this would be a list - skipSpaces - (text, raw) <- withRaw $ - trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) - attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) - guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw attr' - return $ B.headerWith attr' level <$> text - -atxClosing :: PandocMonad m => MarkdownParser m Attr -atxClosing = try $ do - attr' <- option nullAttr - (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) - skipMany . char =<< atxChar - skipSpaces - attr <- option attr' - (guardEnabled Ext_header_attributes >> attributes) - blanklines - return attr - -setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr -setextHeaderEnd = try $ do - attr <- option nullAttr - $ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) - <|> (guardEnabled Ext_header_attributes >> attributes) - blanklines - return attr - -mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr -mmdHeaderIdentifier = do - ident <- stripFirstAndLast . snd <$> reference - skipSpaces - return (ident,[],[]) - -setextHeader :: PandocMonad m => MarkdownParser m (F Blocks) -setextHeader = try $ do - -- This lookahead prevents us from wasting time parsing Inlines - -- unless necessary -- it gives a significant performance boost. - lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - skipSpaces - (text, raw) <- withRaw $ - trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) - attr <- setextHeaderEnd - underlineChar <- oneOf setextHChars - many (char underlineChar) - blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) - guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw attr' - return $ B.headerWith attr' level <$> text - -registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () -registerImplicitHeader raw attr@(ident, _, _) = do - let key = toKey $ "[" ++ raw ++ "]" - updateState (\s -> s { stateHeaderKeys = - M.insert key (('#':ident,""), attr) (stateHeaderKeys s) }) - --- --- hrule block --- - -hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) -hrule = try $ do - skipSpaces - start <- satisfy isHruleChar - count 2 (skipSpaces >> char start) - skipMany (spaceChar <|> char start) - newline - optional blanklines - return $ return B.horizontalRule - --- --- code blocks --- - -indentedLine :: PandocMonad m => MarkdownParser m String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") - -blockDelimiter :: PandocMonad m - => (Char -> Bool) - -> Maybe Int - -> ParserT [Char] st m Int -blockDelimiter f len = try $ do - c <- lookAhead (satisfy f) - case len of - Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length - -attributes :: PandocMonad m => MarkdownParser m Attr -attributes = try $ do - char '{' - spnl - attrs <- many (attribute <* spnl) - char '}' - return $ foldl (\x f -> f x) nullAttr attrs - -attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) -attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr - -identifier :: PandocMonad m => MarkdownParser m String -identifier = do - first <- letter - rest <- many $ alphaNum <|> oneOf "-_:." - return (first:rest) - -identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) -identifierAttr = try $ do - char '#' - result <- identifier - return $ \(_,cs,kvs) -> (result,cs,kvs) - -classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) -classAttr = try $ do - char '.' - result <- identifier - return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs) - -keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) -keyValAttr = try $ do - key <- identifier - char '=' - val <- enclosed (char '"') (char '"') litChar - <|> enclosed (char '\'') (char '\'') litChar - <|> many (escapedChar' <|> noneOf " \t\n\r}") - return $ \(id',cs,kvs) -> - case key of - "id" -> (val,cs,kvs) - "class" -> (id',cs ++ words val,kvs) - _ -> (id',cs,kvs ++ [(key,val)]) - -specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) -specialAttr = do - char '-' - return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) - -codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) -codeBlockFenced = try $ do - c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) - <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) - size <- blockDelimiter (== c) Nothing - skipMany spaceChar - attr <- option ([],[],[]) $ - try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) - blankline - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) - blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents - --- correctly handle github language identifiers -toLanguageId :: String -> String -toLanguageId = map toLower . go - where go "c++" = "cpp" - go "objective-c" = "objectivec" - go x = x - -codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks) -codeBlockIndented = do - contents <- many1 (indentedLine <|> - try (do b <- blanklines - l <- indentedLine - return $ b ++ l)) - optional blanklines - classes <- getOption readerIndentedCodeClasses - return $ return $ B.codeBlockWith ("", classes, []) $ - stripTrailingNewlines $ concat contents - -lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) -lhsCodeBlock = do - guardEnabled Ext_literate_haskell - (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> - (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) - <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> - lhsCodeBlockInverseBird) - -lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String -lhsCodeBlockLaTeX = try $ do - string "\\begin{code}" - manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") - blanklines - return $ stripTrailingNewlines contents - -lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String -lhsCodeBlockBird = lhsCodeBlockBirdWith '>' - -lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String -lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' - -lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String -lhsCodeBlockBirdWith c = try $ do - pos <- getPosition - when (sourceColumn pos /= 1) $ fail "Not in first column" - lns <- many1 $ birdTrackLine c - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns - blanklines - return $ intercalate "\n" lns' - -birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String -birdTrackLine c = try $ do - char c - -- allow html tags on left margin: - when (c == '<') $ notFollowedBy letter - anyLine - --- --- block quotes --- - -emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char -emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') - -emailBlockQuote :: PandocMonad m => MarkdownParser m [String] -emailBlockQuote = try $ do - emailBlockQuoteStart - let emailLine = many $ nonEndline <|> try - (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n') - let emailSep = try (newline >> emailBlockQuoteStart) - first <- emailLine - rest <- many $ try $ emailSep >> emailLine - let raw = first:rest - newline <|> (eof >> return '\n') - optional blanklines - return raw - -blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) -blockQuote = do - raw <- emailBlockQuote - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ B.blockQuote <$> contents - --- --- list blocks --- - -bulletListStart :: PandocMonad m => MarkdownParser m () -bulletListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - notFollowedBy' (() <$ hrule) -- because hrules start out just like lists - satisfy isBulletListMarker - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - () <$ atMostSpaces (tabStop - (endpos - startpos)) - -anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - notFollowedBy $ string "p." >> spaceChar >> digit -- page number - res <- do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead - char '.' - return (start, DefaultStyle, DefaultDelim) - <|> do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, - -- insist on more than one space - when (delim == Period && (style == UpperAlpha || - (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ - () <$ spaceChar - return (num, style, delim) - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res - -listStart :: PandocMonad m => MarkdownParser m () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) - -listLine :: PandocMonad m => MarkdownParser m String -listLine = try $ do - notFollowedBy' (do indentSpaces - many spaceChar - listStart) - notFollowedByHtmlCloser - optional (() <$ indentSpaces) - listLineCommon - -listLineCommon :: PandocMonad m => MarkdownParser m String -listLineCommon = concat <$> manyTill - ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') - <|> liftM snd (htmlTag isCommentTag) - <|> count 1 anyChar - ) newline - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: PandocMonad m - => MarkdownParser m a - -> MarkdownParser m String -rawListItem start = try $ do - start - first <- listLineCommon - rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine) - blanks <- many blankline - return $ unlines (first:rest) ++ blanks - --- continuation of a list item - indented and separated by blankline --- or (in compact lists) endline. --- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => MarkdownParser m String -listContinuation = try $ do - lookAhead indentSpaces - result <- many1 listContinuationLine - blanks <- many blankline - return $ concat result ++ blanks - -notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () -notFollowedByHtmlCloser = do - inHtmlBlock <- stateInHtmlBlock <$> getState - case inHtmlBlock of - Just t -> notFollowedBy' $ htmlTag (~== TagClose t) - Nothing -> return () - -listContinuationLine :: PandocMonad m => MarkdownParser m String -listContinuationLine = try $ do - notFollowedBy blankline - notFollowedBy' listStart - notFollowedByHtmlCloser - optional indentSpaces - result <- anyLine - return $ result ++ "\n" - -listItem :: PandocMonad m - => MarkdownParser m a - -> MarkdownParser m (F Blocks) -listItem start = try $ do - first <- rawListItem start - continuations <- many listContinuation - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw - updateState (\st -> st {stateParserContext = oldContext}) - return contents - -orderedList :: PandocMonad m => MarkdownParser m (F Blocks) -orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart - unless (style `elem` [DefaultStyle, Decimal, Example] && - delim `elem` [DefaultDelim, Period]) $ - guardEnabled Ext_fancy_lists - when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem - ( try $ do - optional newline -- if preceded by Plain block in a list - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - res <- orderedListMarker style delim - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res ) - start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify items - -bulletList :: PandocMonad m => MarkdownParser m (F Blocks) -bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify items - --- definition lists - -defListMarker :: PandocMonad m => MarkdownParser m () -defListMarker = do - sps <- nonindentSpaces - char ':' <|> char '~' - tabStop <- getOption readerTabStop - let remaining = tabStop - (length sps + 1) - if remaining > 0 - then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar - else mzero - return () - -definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks])) -definitionListItem compact = try $ do - rawLine' <- anyLine - raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' - contents <- mapM (parseFromString parseBlocks . (++"\n")) raw - optional blanklines - return $ liftM2 (,) term (sequence contents) - -defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String -defRawBlock compact = try $ do - hasBlank <- option False $ blankline >> return True - defListMarker - firstline <- anyLine - let dline = try - ( do notFollowedBy blankline - notFollowedByHtmlCloser - if compact -- laziness not compatible with compact - then () <$ indentSpaces - else (() <$ indentSpaces) - <|> notFollowedBy defListMarker - anyLine ) - rawlines <- many dline - cont <- liftM concat $ many $ try $ do - trailing <- option "" blanklines - ln <- indentSpaces >> notFollowedBy blankline >> anyLine - lns <- many dline - return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ - if hasBlank || not (null cont) then "\n\n" else "" - -definitionList :: PandocMonad m => MarkdownParser m (F Blocks) -definitionList = try $ do - lookAhead (anyLine >> - optional (blankline >> notFollowedBy (table >> return ())) >> - -- don't capture table caption as def list! - defListMarker) - compactDefinitionList <|> normalDefinitionList - -compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) -compactDefinitionList = do - guardEnabled Ext_compact_definition_lists - items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactifyDL items - -normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks) -normalDefinitionList = do - guardEnabled Ext_definition_lists - items <- fmap sequence $ many1 $ definitionListItem False - return $ B.definitionList <$> items - --- --- paragraph block --- - -para :: PandocMonad m => MarkdownParser m (F Blocks) -para = try $ do - exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline - option (B.plain <$> result) - $ try $ do - newline - (blanklines >> return mempty) - <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) - <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) - <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) - <|> (guardEnabled Ext_lists_without_preceding_blankline >> - -- Avoid creating a paragraph in a nested list. - notFollowedBy' inList >> - () <$ lookAhead listStart) - <|> do guardEnabled Ext_native_divs - inHtmlBlock <- stateInHtmlBlock <$> getState - case inHtmlBlock of - Just "div" -> () <$ - lookAhead (htmlTag (~== TagClose "div")) - _ -> mzero - return $ do - result' <- result - case B.toList result' of - [Image attr alt (src,tit)] - | Ext_implicit_figures `extensionEnabled` exts -> - -- the fig: at beginning of title indicates a figure - return $ B.para $ B.singleton - $ Image attr alt (src,'f':'i':'g':':':tit) - _ -> return $ B.para result' - -plain :: PandocMonad m => MarkdownParser m (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline - --- --- raw html --- - -htmlElement :: PandocMonad m => MarkdownParser m String -htmlElement = rawVerbatimBlock - <|> strictHtmlBlock - <|> liftM snd (htmlTag isBlockTag) - -htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks) -htmlBlock = do - guardEnabled Ext_raw_html - try (do - (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag - (guard (t `elem` ["pre","style","script"]) >> - (return . B.rawBlock "html") <$> rawVerbatimBlock) - <|> (do guardEnabled Ext_markdown_attribute - oldMarkdownAttribute <- stateMarkdownAttribute <$> getState - markdownAttribute <- - case lookup "markdown" attrs of - Just "0" -> False <$ updateState (\st -> st{ - stateMarkdownAttribute = False }) - Just _ -> True <$ updateState (\st -> st{ - stateMarkdownAttribute = True }) - Nothing -> return oldMarkdownAttribute - res <- if markdownAttribute - then rawHtmlBlocks - else htmlBlock' - updateState $ \st -> st{ stateMarkdownAttribute = - oldMarkdownAttribute } - return res) - <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) - <|> htmlBlock' - -htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks) -htmlBlock' = try $ do - first <- htmlElement - skipMany spaceChar - optional blanklines - return $ return $ B.rawBlock "html" first - -strictHtmlBlock :: PandocMonad m => MarkdownParser m String -strictHtmlBlock = htmlInBalanced (not . isInlineTag) - -rawVerbatimBlock :: PandocMonad m => MarkdownParser m String -rawVerbatimBlock = htmlInBalanced isVerbTag - where isVerbTag (TagOpen "pre" _) = True - isVerbTag (TagOpen "style" _) = True - isVerbTag (TagOpen "script" _) = True - isVerbTag _ = False - -rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) -rawTeXBlock = do - guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) - <|> (B.rawBlock "context" . concat <$> - rawConTeXtEnvironment `sepEndBy1` blankline) - spaces - return $ return result - -rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) -rawHtmlBlocks = do - (TagOpen tagtype _, raw) <- htmlTag isBlockTag - -- try to find closing tag - -- we set stateInHtmlBlock so that closing tags that can be either block or - -- inline will not be parsed as inline tags - oldInHtmlBlock <- stateInHtmlBlock <$> getState - updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } - let closer = htmlTag (\x -> x ~== TagClose tagtype) - contents <- mconcat <$> many (notFollowedBy' closer >> block) - result <- - (closer >>= \(_, rawcloser) -> return ( - return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> - contents <> - return (B.rawBlock "html" rawcloser))) - <|> return (return (B.rawBlock "html" raw) <> contents) - updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } - return result - --- remove markdown="1" attribute -stripMarkdownAttribute :: String -> String -stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s - where filterAttrib (TagOpen t as) = TagOpen t - [(k,v) | (k,v) <- as, k /= "markdown"] - filterAttrib x = x - --- --- line block --- - -lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) -lineBlock = try $ do - guardEnabled Ext_line_blocks - lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) - return $ B.lineBlock <$> sequence lines' - --- --- Tables --- - --- Parse a dashed line with optional trailing spaces; return its length --- and the length including trailing space. -dashedLine :: PandocMonad m - => Char - -> ParserT [Char] st m (Int, Int) -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - let lengthDashes = length dashes - lengthSp = length sp - return (lengthDashes, lengthDashes + lengthSp) - --- Parse a table header with dashed lines of '-' preceded by --- one (or zero) line of text. -simpleTableHeader :: PandocMonad m - => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -simpleTableHeader headless = try $ do - rawContent <- if headless - then return "" - else anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' - -- If no header, calculate alignment on basis of first row of text - rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ - if headless - then lookAhead anyLine - else return rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - let rawHeads' = if headless - then replicate (length dashes) "" - else rawHeads - heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' - return (heads, aligns, indices) - --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] - -> Int - -> Alignment -alignType [] _ = AlignDefault -alignType strLst len = - let nonempties = filter (not . null) $ map trimr strLst - (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - --- Parse a table footer - dashed lines followed by blank line. -tableFooter :: PandocMonad m => MarkdownParser m String -tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines - --- Parse a table separator - dashed line. -tableSep :: PandocMonad m => MarkdownParser m Char -tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' - --- Parse a raw line and split it into chunks by indices. -rawTableLine :: PandocMonad m - => [Int] - -> MarkdownParser m [String] -rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map trim $ tail $ - splitStringByIndices (init indices) line - --- Parse a table line and return a list of lists of blocks (columns). -tableLine :: PandocMonad m - => [Int] - -> MarkdownParser m (F [Blocks]) -tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) - --- Parse a multiline table row and return a list of blocks (columns). -multilineRow :: PandocMonad m - => [Int] - -> MarkdownParser m (F [Blocks]) -multilineRow indices = do - colLines <- many1 (rawTableLine indices) - let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols - --- Parses a table caption: inlines beginning with 'Table:' --- and followed by blank lines. -tableCaption :: PandocMonad m => MarkdownParser m (F Inlines) -tableCaption = try $ do - guardEnabled Ext_table_captions - skipNonindentSpaces - string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines - --- Parse a simple table with '---' header and one line per row. -simpleTable :: PandocMonad m - => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -simpleTable headless = do - (aligns, _widths, heads', lines') <- - tableWith (simpleTableHeader headless) tableLine - (return ()) - (if headless then tableFooter else tableFooter <|> blanklines) - -- Simple tables get 0s for relative column widths (i.e., use default) - return (aligns, replicate (length aligns) 0, heads', lines') - --- Parse a multiline table: starts with row of '-' on top, then header --- (which may be multiline), then the rows, --- which may be multiline, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -multilineTable :: PandocMonad m - => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter - -multilineTableHeader :: PandocMonad m - => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -multilineTableHeader headless = try $ do - unless headless $ - tableSep >> notFollowedBy blankline - rawContent <- if headless - then return $ repeat "" - else many1 $ notFollowedBy tableSep >> anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' - rawHeadsList <- if headless - then liftM (map (:[]) . tail . - splitStringByIndices (init indices)) $ lookAhead anyLine - else return $ transpose $ map - (tail . splitStringByIndices (init indices)) - rawContent - let aligns = zipWith alignType rawHeadsList lengths - let rawHeads = if headless - then replicate (length dashes) "" - else map (unlines . map trim) rawHeadsList - heads <- fmap sequence $ - mapM (parseFromString (mconcat <$> many plain)) $ - map trim rawHeads - return (heads, aligns, indices) - --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -gridTable :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line - -gridPart :: PandocMonad m => Char -> ParserT [Char] st m ((Int, Int), Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return ((lengthDashes, lengthDashes + 1), alignment) - -gridDashedLines :: PandocMonad m => Char -> ParserT [Char] st m [((Int, Int), Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse - --- | Separator between rows of grid table. -gridTableSep :: PandocMonad m => Char -> MarkdownParser m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m (F [Blocks], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return [] - else many1 (try (char '|' >> anyLine)) - underDashes <- if headless - then return dashes - else gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map (snd . fst) underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: PandocMonad m => [Int] -> MarkdownParser m [String] -gridTableRawLine indices = do - char '|' - line <- anyLine - return (gridTableSplitLine indices line) - --- | Parse row of grid table. -gridTableRow :: PandocMonad m => [Int] - -> MarkdownParser m (F [Blocks]) -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - fmap compactify <$> fmap sequence (mapM (parseFromString parseBlocks) cols) - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- | Parse footer for a grid table. -gridTableFooter :: PandocMonad m => MarkdownParser m [Char] -gridTableFooter = blanklines - -pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) -pipeBreak = try $ do - nonindentSpaces - openPipe <- (True <$ char '|') <|> return False - first <- pipeTableHeaderPart - rest <- many $ sepPipe *> pipeTableHeaderPart - -- surrounding pipes needed for a one-column table: - guard $ not (null rest && not openPipe) - optional (char '|') - blankline - return $ unzip (first:rest) - -pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -pipeTable = try $ do - nonindentSpaces - lookAhead nonspaceChar - (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak - let heads' = take (length aligns) <$> heads - lines' <- many pipeTableRow - let lines'' = map (take (length aligns) <$>) lines' - let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads' : lines'') - numColumns <- getOption readerColumns - let widths = if maxlength > numColumns - then map (\len -> - fromIntegral (len + 1) / fromIntegral numColumns) - seplengths - else replicate (length aligns) 0.0 - return $ (aligns, widths, heads', sequence lines'') - -sepPipe :: PandocMonad m => MarkdownParser m () -sepPipe = try $ do - char '|' <|> char '+' - notFollowedBy blankline - --- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks]) -pipeTableRow = try $ do - scanForPipe - skipMany spaceChar - openPipe <- (True <$ char '|') <|> return False - -- split into cells - let chunk = void (code <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline') - <|> void (noneOf "|\n\r") - let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>= - parseFromString pipeTableCell - cells <- cellContents `sepEndBy1` (char '|') - -- surrounding pipes needed for a one-column table: - guard $ not (length cells == 1 && not openPipe) - blankline - return $ sequence cells - -pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks) -pipeTableCell = do - result <- many inline - if null result - then return mempty - else return $ B.plain . mconcat <$> sequence result - -pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) -pipeTableHeaderPart = try $ do - skipMany spaceChar - left <- optionMaybe (char ':') - pipe <- many1 (char '-') - right <- optionMaybe (char ':') - skipMany spaceChar - let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right - return $ - ((case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter), len) - --- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT [Char] st m () -scanForPipe = do - inp <- getInput - case break (\c -> c == '\n' || c == '|') inp of - (_,'|':_) -> return () - _ -> mzero - --- | Parse a table using 'headerParser', 'rowParser', --- 'lineParser', and 'footerParser'. Variant of the version in --- Text.Pandoc.Parsing. -tableWith :: PandocMonad m - => MarkdownParser m (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser m (F [Blocks])) - -> MarkdownParser m sep - -> MarkdownParser m end - -> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]]) -tableWith headerParser rowParser lineParser footerParser = try $ do - (heads, aligns, indices) <- headerParser - lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser - footerParser - numColumns <- getOption readerColumns - let widths = if (indices == []) - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') - -table :: PandocMonad m => MarkdownParser m (F Blocks) -table = try $ do - frontCaption <- option Nothing (Just <$> tableCaption) - (aligns, widths, heads, lns) <- - try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|> - try (guardEnabled Ext_multiline_tables >> - multilineTable False) <|> - try (guardEnabled Ext_simple_tables >> - (simpleTable True <|> simpleTable False)) <|> - try (guardEnabled Ext_multiline_tables >> - multilineTable True) <|> - try (guardEnabled Ext_grid_tables >> - (gridTable False <|> gridTable True)) <?> "table" - optional blanklines - caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption - Just c -> return c - -- renormalize widths if greater than 100%: - let totalWidth = sum widths - let widths' = if totalWidth < 1 - then widths - else map (/ totalWidth) widths - return $ do - caption' <- caption - heads' <- heads - lns' <- lns - return $ B.table caption' (zip aligns widths') heads' lns' - --- --- inline --- - -inline :: PandocMonad m => MarkdownParser m (F Inlines) -inline = choice [ whitespace - , bareURL - , str - , endline - , code - , strongOrEmph - , note - , cite - , bracketedSpan - , link - , image - , math - , strikeout - , subscript - , superscript - , inlineNote -- after superscript because of ^[link](/foo)^ - , autoLink - , spanHtml - , rawHtmlInline - , escapedChar - , rawLaTeXInline' - , exampleRef - , smart - , return . B.singleton <$> charRef - , emoji - , symbol - , ltSign - ] <?> "inline" - -escapedChar' :: PandocMonad m => MarkdownParser m Char -escapedChar' = try $ do - char '\\' - (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) - <|> (guardEnabled Ext_angle_brackets_escapable >> - oneOf "\\`*_{}[]()>#+-.!~\"<>") - <|> (guardEnabled Ext_escaped_line_breaks >> char '\n') - <|> oneOf "\\`*_{}[]()>#+-.!~\"" - -escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) -escapedChar = do - result <- escapedChar' - case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] - -ltSign :: PandocMonad m => MarkdownParser m (F Inlines) -ltSign = do - guardDisabled Ext_raw_html - <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) - char '<' - return $ return $ B.str "<" - -exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) -exampleRef = try $ do - guardEnabled Ext_example_lists - char '@' - lab <- many1 (alphaNum <|> oneOf "-_") - return $ do - st <- askF - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) - -symbol :: PandocMonad m => MarkdownParser m (F Inlines) -symbol = do - result <- noneOf "<\\\n\t " - <|> try (do lookAhead $ char '\\' - notFollowedBy' (() <$ rawTeXBlock) - char '\\') - return $ return $ B.str [result] - --- parses inline code, between n `s and n `s -code :: PandocMonad m => MarkdownParser m (F Inlines) -code = try $ do - starts <- many1 (char '`') - skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> notFollowedBy' blankline >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> - notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes - >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result - -math :: PandocMonad m => MarkdownParser m (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> - (guardEnabled Ext_smart *> (return <$> apostrophe) - <* notFollowedBy (space <|> satisfy isPunctuation)) - --- Parses material enclosed in *s, **s, _s, or __s. --- Designed to avoid backtracking. -enclosure :: PandocMonad m - => Char - -> MarkdownParser m (F Inlines) -enclosure c = do - -- we can't start an enclosure with _ if after a string and - -- the intraword_underscores extension is enabled: - guardDisabled Ext_intraword_underscores - <|> guard (c == '*') - <|> (guard =<< notAfterString) - cs <- many1 (char c) - (return (B.str cs) <>) <$> whitespace - <|> do - case length cs of - 3 -> three c - 2 -> two c mempty - 1 -> one c mempty - _ -> return (return $ B.str cs) - -ender :: PandocMonad m => Char -> Int -> MarkdownParser m () -ender c n = try $ do - count n (char c) - guard (c == '*') - <|> guardDisabled Ext_intraword_underscores - <|> notFollowedBy alphaNum - --- Parse inlines til you hit one c or a sequence of two cs. --- If one c, emit emph and then parse two. --- If two cs, emit strong and then parse one. --- Otherwise, emit ccc then the results. -three :: PandocMonad m => Char -> MarkdownParser m (F Inlines) -three c = do - contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) - --- Parse inlines til you hit two c's, and emit strong. --- If you never do hit two cs, emit ** plus inlines parsed. -two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) -two c prefix' = do - contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) - --- Parse inlines til you hit a c, and emit emph. --- If you never hit a c, emit * plus inlines parsed. -one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines) -one c prefix' = do - contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) - <|> try (string [c,c] >> - notFollowedBy (ender c 1) >> - two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) - -strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) -strongOrEmph = enclosure '*' <|> enclosure '_' - --- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: PandocMonad m - => (Show b) - => MarkdownParser m a - -> MarkdownParser m b - -> MarkdownParser m (F Inlines) -inlinesBetween start end = - (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) - innerSpace = try $ whitespace <* notFollowedBy' end - -strikeout :: PandocMonad m => MarkdownParser m (F Inlines) -strikeout = fmap B.strikeout <$> - (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) - where strikeStart = string "~~" >> lookAhead nonspaceChar - >> notFollowedBy (char '~') - strikeEnd = try $ string "~~" - -superscript :: PandocMonad m => MarkdownParser m (F Inlines) -superscript = fmap B.superscript <$> try (do - guardEnabled Ext_superscript - char '^' - mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) - -subscript :: PandocMonad m => MarkdownParser m (F Inlines) -subscript = fmap B.subscript <$> try (do - guardEnabled Ext_subscript - char '~' - mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) - -whitespace :: PandocMonad m => MarkdownParser m (F Inlines) -whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" - where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) - regsp = skipMany spaceChar >> return B.space - -nonEndline :: PandocMonad m => ParserT [Char] st m Char -nonEndline = satisfy (/='\n') - -str :: PandocMonad m => MarkdownParser m (F Inlines) -str = do - result <- many1 alphaNum - updateLastStrPos - let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if isSmart - then case likelyAbbrev result of - [] -> return $ return $ B.str result - xs -> choice (map (\x -> - try (string x >> oneOf " \n" >> - lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result - --- | if the string matches the beginning of an abbreviation (before --- the first period, return strings that would finish the abbreviation. -likelyAbbrev :: String -> [String] -likelyAbbrev x = - let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", - "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", - "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", - "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", - "ch.", "sec.", "cf.", "cp."] - abbrPairs = map (break (=='.')) abbrevs - in map snd $ filter (\(y,_) -> y == x) abbrPairs - --- an endline character that can be treated as a space, not a structural break -endline :: PandocMonad m => MarkdownParser m (F Inlines) -endline = try $ do - newline - notFollowedBy blankline - -- parse potential list-starts differently if in a list: - notFollowedBy (inList >> listStart) - guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart - guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart - guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header - guardDisabled Ext_backtick_code_blocks <|> - notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - notFollowedByHtmlCloser - (eof >> return mempty) - <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) - <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (skipMany spaceChar >> return (return B.softbreak)) - --- --- links --- - --- a reference label for a link -reference :: PandocMonad m => MarkdownParser m (F Inlines, String) -reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets - -parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] -parenthesizedChars = do - result <- charsInBalanced '(' ')' litChar - return $ '(' : result ++ ")" - --- source for a link, with optional title -source :: PandocMonad m => MarkdownParser m (String, String) -source = do - char '(' - skipSpaces - let urlChunk = - try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> (count 1 litChar)) - <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) - let sourceURL = (unwords . words . concat) <$> many urlChunk - let betweenAngles = try $ - char '<' >> manyTill litChar (char '>') - src <- try betweenAngles <|> sourceURL - tit <- option "" $ try $ spnl >> linkTitle - skipSpaces - char ')' - return (escapeURI $ trimr src, tit) - -linkTitle :: PandocMonad m => MarkdownParser m String -linkTitle = quotedTitle '"' <|> quotedTitle '\'' - -link :: PandocMonad m => MarkdownParser m (F Inlines) -link = try $ do - st <- getState - guard $ stateAllowLinks st - setState $ st{ stateAllowLinks = False } - (lab,raw) <- reference - setState $ st{ stateAllowLinks = True } - regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) - -bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines) -bracketedSpan = try $ do - guardEnabled Ext_bracketed_spans - (lab,_) <- reference - attr <- attributes - let (ident,classes,keyvals) = attr - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> lab - _ -> return $ B.spanWith attr <$> lab - -regLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> F Inlines - -> MarkdownParser m (F Inlines) -regLink constructor lab = try $ do - (src, tit) <- source - attr <- option nullAttr $ - guardEnabled Ext_link_attributes >> attributes - return $ constructor attr src tit <$> lab - --- a link like [this][ref] or [this][] or [this] -referenceLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) - -> MarkdownParser m (F Inlines) -referenceLink constructor (lab, raw) = do - sp <- (True <$ lookAhead (char ' ')) <|> return False - (_,raw') <- option (mempty, "") $ - lookAhead (try (guardEnabled Ext_citations >> - spnl >> normalCite >> return (mempty, ""))) - <|> - try (spnl >> reference) - when (raw' == "") $ guardEnabled Ext_shortcut_reference_links - let labIsRef = raw' == "" || raw' == "[]" - let key = toKey $ if labIsRef then raw else raw' - parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw - implicitHeaderRefs <- option False $ - True <$ guardEnabled Ext_implicit_header_references - let makeFallback = do - parsedRaw' <- parsedRaw - fallback' <- fallback - return $ B.str "[" <> fallback' <> B.str "]" <> - (if sp && not (null raw) then B.space else mempty) <> - parsedRaw' - return $ do - keys <- asksF stateKeys - case M.lookup key keys of - Nothing -> - if implicitHeaderRefs - then do - headerKeys <- asksF stateHeaderKeys - case M.lookup key headerKeys of - Just ((src, tit), _) -> constructor nullAttr src tit <$> lab - Nothing -> makeFallback - else makeFallback - Just ((src,tit), attr) -> constructor attr src tit <$> lab - -dropBrackets :: String -> String -dropBrackets = reverse . dropRB . reverse . dropLB - where dropRB (']':xs) = xs - dropRB xs = xs - dropLB ('[':xs) = xs - dropLB xs = xs - -bareURL :: PandocMonad m => MarkdownParser m (F Inlines) -bareURL = try $ do - guardEnabled Ext_autolink_bare_uris - getState >>= guard . stateAllowLinks - (orig, src) <- uri <|> emailAddress - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ return $ B.link src "" (B.str orig) - -autoLink :: PandocMonad m => MarkdownParser m (F Inlines) -autoLink = try $ do - getState >>= guard . stateAllowLinks - char '<' - (orig, src) <- uri <|> emailAddress - -- in rare cases, something may remain after the uri parser - -- is finished, because the uri parser tries to avoid parsing - -- final punctuation. for example: in `<http://hi---there>`, - -- the URI parser will stop before the dashes. - extra <- fromEntities <$> manyTill nonspaceChar (char '>') - attr <- option nullAttr $ try $ - guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) - -image :: PandocMonad m => MarkdownParser m (F Inlines) -image = try $ do - char '!' - (lab,raw) <- reference - defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension src of - "" -> B.imageWith attr' (addExtension src defaultExt) - _ -> B.imageWith attr' src - regLink constructor lab <|> referenceLink constructor (lab,raw) - -note :: PandocMonad m => MarkdownParser m (F Inlines) -note = try $ do - guardEnabled Ext_footnotes - ref <- noteMarker - return $ do - notes <- asksF stateNotes' - case lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do - st <- askF - -- process the note in a context that doesn't resolve - -- notes, to avoid infinite looping with notes inside - -- notes: - let contents' = runF contents st{ stateNotes' = [] } - return $ B.note contents' - -inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) -inlineNote = try $ do - guardEnabled Ext_inline_notes - char '^' - contents <- inlinesInBalancedBrackets - return $ B.note . B.para <$> contents - -rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines) -rawLaTeXInline' = try $ do - guardEnabled Ext_raw_tex - lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s - -- "tex" because it might be context or latex - -rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String -rawConTeXtEnvironment = try $ do - string "\\start" - completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> (many1 letter) - contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar)) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion - -inBrackets :: PandocMonad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String -inBrackets parser = do - char '[' - contents <- many parser - char ']' - return $ "[" ++ contents ++ "]" - -spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) -spanHtml = try $ do - guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - case lookup "style" keyvals of - Just s | null ident && null classes && - map toLower (filter (`notElem` " \t;") s) == - "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents - -divHtml :: PandocMonad m => MarkdownParser m (F Blocks) -divHtml = try $ do - guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) - -- we set stateInHtmlBlock so that closing tags that can be either block or - -- inline will not be parsed as inline tags - oldInHtmlBlock <- stateInHtmlBlock <$> getState - updateState $ \st -> st{ stateInHtmlBlock = Just "div" } - bls <- option "" (blankline >> option "" blanklines) - contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) - closed <- option False (True <$ htmlTag (~== TagClose "div")) - if closed - then do - updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents - else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents - -rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines) -rawHtmlInline = do - guardEnabled Ext_raw_html - inHtmlBlock <- stateInHtmlBlock <$> getState - let isCloseBlockTag t = case inHtmlBlock of - Just t' -> t ~== TagClose t' - Nothing -> False - mdInHtml <- option False $ - ( guardEnabled Ext_markdown_in_html_blocks - <|> guardEnabled Ext_markdown_attribute - ) >> return True - (_,result) <- htmlTag $ if mdInHtml - then (\x -> isInlineTag x && - not (isCloseBlockTag x)) - else not . isTextTag - return $ return $ B.rawInline "html" result - --- Emoji - -emojiChars :: [Char] -emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] - -emoji :: PandocMonad m => MarkdownParser m (F Inlines) -emoji = try $ do - guardEnabled Ext_emoji - char ':' - emojikey <- many1 (oneOf emojiChars) - char ':' - case M.lookup emojikey emojis of - Just s -> return (return (B.str s)) - Nothing -> mzero - --- Citations - -cite :: PandocMonad m => MarkdownParser m (F Inlines) -cite = do - guardEnabled Ext_citations - citations <- textualCite - <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs - return citations - -textualCite :: PandocMonad m => MarkdownParser m (F Inlines) -textualCite = try $ do - (_, key) <- citeKey - let first = Citation{ citationId = key - , citationPrefix = [] - , citationSuffix = [] - , citationMode = AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite - case mbrest of - Just (rest, raw) -> - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) - <$> rest - Nothing -> - (do - (cs, raw) <- withRaw $ bareloc first - let (spaces',raw') = span isSpace raw - spc | null spaces' = mempty - | otherwise = B.space - lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' - fallback <- referenceLink B.linkWith (lab,raw') - return $ do - fallback' <- fallback - cs' <- cs - return $ - case B.toList fallback' of - Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' - _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) - <|> return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) - -bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) -bareloc c = try $ do - spnl - char '[' - notFollowedBy $ char '^' - suff <- suffix - rest <- option (return []) $ try $ char ';' >> citeList - spnl - char ']' - notFollowedBy $ oneOf "[(" - return $ do - suff' <- suff - rest' <- rest - return $ c{ citationSuffix = B.toList suff' } : rest' - -normalCite :: PandocMonad m => MarkdownParser m (F [Citation]) -normalCite = try $ do - char '[' - spnl - citations <- citeList - spnl - char ']' - return citations - -suffix :: PandocMonad m => MarkdownParser m (F Inlines) -suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - spnl - rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - -prefix :: PandocMonad m => MarkdownParser m (F Inlines) -prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) - -citeList :: PandocMonad m => MarkdownParser m (F [Citation]) -citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) - -citation :: PandocMonad m => MarkdownParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey - suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: PandocMonad m => MarkdownParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [apostrophe, dash, ellipses]) - -singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd - --- doubleQuoted will handle regular double-quoted sections, as well --- as dialogues with an open double-quote without a close double-quote --- in the same paragraph. -doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) -doubleQuoted = try $ do - doubleQuoteStart - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs deleted file mode 100644 index 14f9da9b6..000000000 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ /dev/null @@ -1,677 +0,0 @@ -{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} --- RelaxedPolyRec needed for inlinesBetween on GHC < 7 -{- - Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of mediawiki text to 'Pandoc' document. --} -{- -TODO: -_ correctly handle tables within tables -_ parse templates? --} -module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where - -import Text.Pandoc.Definition -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import Data.Monoid ((<>)) -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) -import Text.Pandoc.XML ( fromEntities ) -import Text.Pandoc.Parsing hiding ( nested ) -import Text.Pandoc.Walk ( walk ) -import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) -import Control.Monad -import Data.List (intersperse, intercalate, isPrefixOf ) -import Text.HTML.TagSoup -import Data.Sequence (viewl, ViewL(..), (<|)) -import qualified Data.Foldable as F -import qualified Data.Map as M -import qualified Data.Set as Set -import Data.Char (isDigit, isSpace) -import Data.Maybe (fromMaybe) -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, report) - --- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readMediaWiki opts s = do - parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts - , mwMaxNestingLevel = 4 - , mwNextLinkNumber = 1 - , mwCategoryLinks = [] - , mwHeaderMap = M.empty - , mwIdentifierList = Set.empty - } - (s ++ "\n") - case parsed of - Right result -> return result - Left e -> throwError e - -data MWState = MWState { mwOptions :: ReaderOptions - , mwMaxNestingLevel :: Int - , mwNextLinkNumber :: Int - , mwCategoryLinks :: [Inlines] - , mwHeaderMap :: M.Map Inlines String - , mwIdentifierList :: Set.Set String - } - -type MWParser m = ParserT [Char] MWState m - -instance HasReaderOptions MWState where - extractReaderOptions = mwOptions - -instance HasHeaderMap MWState where - extractHeaderMap = mwHeaderMap - updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st } - -instance HasIdentifierList MWState where - extractIdentifierList = mwIdentifierList - updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st } - --- --- auxiliary functions --- - --- This is used to prevent exponential blowups for things like: --- ''a'''a''a'''a''a'''a''a'''a -nested :: PandocMonad m => MWParser m a -> MWParser m a -nested p = do - nestlevel <- mwMaxNestingLevel `fmap` getState - guard $ nestlevel > 0 - updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ mwMaxNestingLevel = nestlevel } - return res - -specialChars :: [Char] -specialChars = "'[]<=&*{}|\":\\" - -spaceChars :: [Char] -spaceChars = " \n\t" - -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) - -newBlockTags :: [String] -newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] - -isBlockTag' :: Tag String -> Bool -isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && - t `notElem` eitherBlockOrInline -isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && - t `notElem` eitherBlockOrInline -isBlockTag' tag = isBlockTag tag - -isInlineTag' :: Tag String -> Bool -isInlineTag' (TagComment _) = True -isInlineTag' t = not (isBlockTag' t) - -eitherBlockOrInline :: [String] -eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", - "map", "area", "object"] - -htmlComment :: PandocMonad m => MWParser m () -htmlComment = () <$ htmlTag isCommentTag - -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines -inlinesInTags tag = try $ do - (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag - then return mempty - else trimInlines . mconcat <$> - manyTill inline (htmlTag (~== TagClose tag)) - -blocksInTags :: PandocMonad m => String -> MWParser m Blocks -blocksInTags tag = try $ do - (_,raw) <- htmlTag (~== TagOpen tag []) - let closer = if tag == "li" - then htmlTag (~== TagClose "li") - <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) - else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag - then return mempty - else mconcat <$> manyTill block closer - -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do - (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag - then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) - --- --- main parser --- - -parseMediaWiki :: PandocMonad m => MWParser m Pandoc -parseMediaWiki = do - bs <- mconcat <$> many block - spaces - eof - categoryLinks <- reverse . mwCategoryLinks <$> getState - let categories = if null categoryLinks - then mempty - else B.para $ mconcat $ intersperse B.space categoryLinks - return $ B.doc $ bs <> categories - --- --- block parsers --- - -block :: PandocMonad m => MWParser m Blocks -block = do - pos <- getPosition - res <- mempty <$ skipMany1 blankline - <|> table - <|> header - <|> hrule - <|> orderedList - <|> bulletList - <|> definitionList - <|> mempty <$ try (spaces *> htmlComment) - <|> preformatted - <|> blockTag - <|> (B.rawBlock "mediawiki" <$> template) - <|> para - report $ ParsingTrace (take 60 $ show $ B.toList res) pos - return res - -para :: PandocMonad m => MWParser m Blocks -para = do - contents <- trimInlines . mconcat <$> many1 inline - if F.all (==Space) contents - then return mempty - else return $ B.para contents - -table :: PandocMonad m => MWParser m Blocks -table = do - tableStart - styles <- option [] parseAttrs <* blankline - let tableWidth = case lookup "width" styles of - Just w -> fromMaybe 1.0 $ parseWidth w - Nothing -> 1.0 - caption <- option mempty tableCaption - optional rowsep - hasheader <- option False $ True <$ (lookAhead (skipSpaces *> char '!')) - (cellspecs',hdr) <- unzip <$> tableRow - let widths = map ((tableWidth *) . snd) cellspecs' - let restwidth = tableWidth - sum widths - let zerocols = length $ filter (==0.0) widths - let defaultwidth = if zerocols == 0 || zerocols == length widths - then 0.0 - else restwidth / fromIntegral zerocols - let widths' = map (\w -> if w == 0 then defaultwidth else w) widths - let cellspecs = zip (map fst cellspecs') widths' - rows' <- many $ try $ rowsep *> (map snd <$> tableRow) - optional blanklines - tableEnd - let cols = length hdr - let (headers,rows) = if hasheader - then (hdr, rows') - else (replicate cols mempty, hdr:rows') - return $ B.table caption cellspecs headers rows - -parseAttrs :: PandocMonad m => MWParser m [(String,String)] -parseAttrs = many1 parseAttr - -parseAttr :: PandocMonad m => MWParser m (String, String) -parseAttr = try $ do - skipMany spaceChar - k <- many1 letter - char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') - return (k,v) - -tableStart :: PandocMonad m => MWParser m () -tableStart = try $ guardColumnOne *> skipSpaces *> sym "{|" - -tableEnd :: PandocMonad m => MWParser m () -tableEnd = try $ guardColumnOne *> skipSpaces *> sym "|}" - -rowsep :: PandocMonad m => MWParser m () -rowsep = try $ guardColumnOne *> skipSpaces *> sym "|-" <* - optional parseAttr <* blanklines - -cellsep :: PandocMonad m => MWParser m () -cellsep = try $ - (guardColumnOne *> skipSpaces <* - ( (char '|' <* notFollowedBy (oneOf "-}+")) - <|> (char '!') - ) - ) - <|> (() <$ try (string "||")) - <|> (() <$ try (string "!!")) - -tableCaption :: PandocMonad m => MWParser m Inlines -tableCaption = try $ do - guardColumnOne - skipSpaces - sym "|+" - optional (try $ parseAttr *> skipSpaces *> char '|' *> skipSpaces) - (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) - -tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] -tableRow = try $ skipMany htmlComment *> many tableCell - -tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) -tableCell = try $ do - cellsep - skipMany spaceChar - attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* - notFollowedBy (char '|') - skipMany spaceChar - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) - bs <- parseFromString (mconcat <$> many block) ls - let align = case lookup "align" attrs of - Just "left" -> AlignLeft - Just "right" -> AlignRight - Just "center" -> AlignCenter - _ -> AlignDefault - let width = case lookup "width" attrs of - Just xs -> fromMaybe 0.0 $ parseWidth xs - Nothing -> 0.0 - return ((align, width), bs) - -parseWidth :: String -> Maybe Double -parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing - -template :: PandocMonad m => MWParser m String -template = try $ do - string "{{" - notFollowedBy (char '{') - lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar - contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" - -blockTag :: PandocMonad m => MWParser m Blocks -blockTag = do - (tag, _) <- lookAhead $ htmlTag isBlockTag' - case tag of - TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" - TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs - TagOpen "source" attrs -> syntaxhighlight "source" attrs - TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" - TagOpen "gallery" _ -> blocksInTags "gallery" - TagOpen "p" _ -> mempty <$ htmlTag (~== tag) - TagClose "p" -> mempty <$ htmlTag (~== tag) - _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) - -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs - -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks -syntaxhighlight tag attrs = try $ do - let mblang = lookup "lang" attrs - let mbstart = lookup "start" attrs - let mbline = lookup "line" attrs - let classes = maybe [] (:[]) mblang ++ maybe [] (const ["numberLines"]) mbline - let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag - return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents - -hrule :: PandocMonad m => MWParser m Blocks -hrule = B.horizontalRule <$ try (string "----" *> many (char '-') *> newline) - -guardColumnOne :: PandocMonad m => MWParser m () -guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) - -preformatted :: PandocMonad m => MWParser m Blocks -preformatted = try $ do - guardColumnOne - char ' ' - let endline' = B.linebreak <$ (try $ newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) - let spToNbsp ' ' = '\160' - spToNbsp x = x - let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) - let inline' = whitespace' <|> endline' <|> nowiki' - <|> (try $ notFollowedBy newline *> inline) - contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs - spacesStr _ = False - if F.all spacesStr contents - then return mempty - else return $ B.para $ encode contents - -encode :: Inlines -> Inlines -encode = B.fromList . normalizeCode . B.toList . walk strToCode - where strToCode (Str s) = Code ("",[],[]) s - strToCode Space = Code ("",[],[]) " " - strToCode x = x - normalizeCode [] = [] - normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ (Code a1 (x ++ y)) : zs - normalizeCode (x:xs) = x : normalizeCode xs - -header :: PandocMonad m => MWParser m Blocks -header = try $ do - guardColumnOne - eqs <- many1 (char '=') - let lev = length eqs - guard $ lev <= 6 - contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') - attr <- registerHeader nullAttr contents - return $ B.headerWith attr lev contents - -bulletList :: PandocMonad m => MWParser m Blocks -bulletList = B.bulletList <$> - ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) - -orderedList :: PandocMonad m => MWParser m Blocks -orderedList = - (B.orderedList <$> many1 (listItem '#')) - <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) - spaces - items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) - let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag - return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) - -definitionList :: PandocMonad m => MWParser m Blocks -definitionList = B.definitionList <$> many1 defListItem - -defListItem :: PandocMonad m => MWParser m (Inlines, [Blocks]) -defListItem = try $ do - terms <- mconcat . intersperse B.linebreak <$> many defListTerm - -- we allow dd with no dt, or dt with no dd - defs <- if B.isNull terms - then notFollowedBy - (try $ skipMany1 (char ':') >> string "<math>") *> - many1 (listItem ':') - else many (listItem ':') - return (terms, defs) - -defListTerm :: PandocMonad m => MWParser m Inlines -defListTerm = char ';' >> skipMany spaceChar >> anyLine >>= - parseFromString (trimInlines . mconcat <$> many inline) - -listStart :: PandocMonad m => Char -> MWParser m () -listStart c = char c *> notFollowedBy listStartChar - -listStartChar :: PandocMonad m => MWParser m Char -listStartChar = oneOf "*#;:" - -anyListStart :: PandocMonad m => MWParser m Char -anyListStart = char '*' - <|> char '#' - <|> char ':' - <|> char ';' - -li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> - (firstParaToPlain <$> blocksInTags "li") <* spaces - -listItem :: PandocMonad m => Char -> MWParser m Blocks -listItem c = try $ do - extras <- many (try $ char c <* lookAhead listStartChar) - if null extras - then listItem' c - else do - skipMany spaceChar - first <- concat <$> manyTill listChunk newline - rest <- many - (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) - contents <- parseFromString (many1 $ listItem' c) - (unlines (first : rest)) - case c of - '*' -> return $ B.bulletList contents - '#' -> return $ B.orderedList contents - ':' -> return $ B.definitionList [(mempty, contents)] - _ -> mzero - --- The point of this is to handle stuff like --- * {{cite book --- | blah --- | blah --- }} --- * next list item --- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar - -listItem' :: PandocMonad m => Char -> MWParser m Blocks -listItem' c = try $ do - listStart c - skipMany spaceChar - first <- concat <$> manyTill listChunk newline - rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) - parseFromString (firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest - -firstParaToPlain :: Blocks -> Blocks -firstParaToPlain contents = - case viewl (B.unMany contents) of - (Para xs) :< ys -> B.Many $ (Plain xs) <| ys - _ -> contents - --- --- inline parsers --- - -inline :: PandocMonad m => MWParser m Inlines -inline = whitespace - <|> url - <|> str - <|> doubleQuotes - <|> strong - <|> emph - <|> image - <|> internalLink - <|> externalLink - <|> math - <|> inlineTag - <|> B.singleton <$> charRef - <|> inlineHtml - <|> (B.rawInline "mediawiki" <$> variable) - <|> (B.rawInline "mediawiki" <$> template) - <|> special - -str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) - -math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) - where dmStart = string "\\[" - dmEnd = try (string "\\]") - mStart = string "\\(" - mEnd = try (string "\\)") - -variable :: PandocMonad m => MWParser m String -variable = try $ do - string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" - -inlineTag :: PandocMonad m => MWParser m Inlines -inlineTag = do - (tag, _) <- lookAhead $ htmlTag isInlineTag' - case tag of - TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" - TagOpen "nowiki" _ -> try $ do - (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw - then return mempty - else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too - *> optional blankline) - TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" - TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" - TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" - TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" - TagOpen "code" _ -> encode <$> inlinesInTags "code" - TagOpen "tt" _ -> encode <$> inlinesInTags "tt" - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" - _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) - -special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) - -inlineHtml :: PandocMonad m => MWParser m Inlines -inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' - -whitespace :: PandocMonad m => MWParser m Inlines -whitespace = B.space <$ (skipMany1 spaceChar <|> htmlComment) - <|> B.softbreak <$ endline - -endline :: PandocMonad m => MWParser m () -endline = () <$ try (newline <* - notFollowedBy spaceChar <* - notFollowedBy newline <* - notFollowedBy' hrule <* - notFollowedBy tableStart <* - notFollowedBy' header <* - notFollowedBy anyListStart) - -imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] - where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", - "Bild"] - -image :: PandocMonad m => MWParser m Inlines -image = try $ do - sym "[[" - choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") - _ <- many imageOption - dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px") - <|> return [] - _ <- many imageOption - let kvs = case dims of - w:[] -> [("width", w)] - w:(h:[]) -> [("width", w), ("height", h)] - _ -> [] - let attr = ("", [], kvs) - caption <- (B.str fname <$ sym "]]") - <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption - -imageOption :: PandocMonad m => MWParser m String -imageOption = try $ char '|' *> opt - where - opt = try (oneOfStrings [ "border", "thumbnail", "frameless" - , "thumb", "upright", "left", "right" - , "center", "none", "baseline", "sub" - , "super", "top", "text-top", "middle" - , "bottom", "text-bottom" ]) - <|> try (string "frame") - <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) - -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words - -internalLink :: PandocMonad m => MWParser m Inlines -internalLink = try $ do - sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") - label <- option (B.text pagename) $ char '|' *> - ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) - -- the "pipe trick" - -- [[Help:Contents|] -> "Contents" - <|> (return $ B.text $ drop 1 $ dropWhile (/=':') pagename) ) - sym "]]" - linktrail <- B.text <$> many letter - let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename - then do - updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } - return mempty - else return link - -externalLink :: PandocMonad m => MWParser m Inlines -externalLink = try $ do - char '[' - (_, src) <- uri - lab <- try (trimInlines . mconcat <$> - (skipMany1 spaceChar *> manyTill inline (char ']'))) - <|> do char ']' - num <- mwNextLinkNumber <$> getState - updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num - return $ B.link src "" lab - -url :: PandocMonad m => MWParser m Inlines -url = do - (orig, src) <- uri - return $ B.link src "" (B.str orig) - --- | Parses a list of inlines between start and end delimiters. -inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines -inlinesBetween start end = - (trimInlines . mconcat) <$> try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) - innerSpace = try $ whitespace <* notFollowedBy' end - -emph :: PandocMonad m => MWParser m Inlines -emph = B.emph <$> nested (inlinesBetween start end) - where start = sym "''" >> lookAhead nonspaceChar - end = try $ notFollowedBy' (() <$ strong) >> sym "''" - -strong :: PandocMonad m => MWParser m Inlines -strong = B.strong <$> nested (inlinesBetween start end) - where start = sym "'''" >> lookAhead nonspaceChar - end = try $ sym "'''" - -doubleQuotes :: PandocMonad m => MWParser m Inlines -doubleQuotes = B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) - where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar - closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs deleted file mode 100644 index 1953c0c83..000000000 --- a/src/Text/Pandoc/Readers/Native.hs +++ /dev/null @@ -1,71 +0,0 @@ -{- -Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; Either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of a string representation of a pandoc type (@Pandoc@, -@[Block]@, @Block@, @[Inline]@, or @Inline@) to a @Pandoc@ document. --} -module Text.Pandoc.Readers.Native ( readNative ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Shared (safeRead) -import Text.Pandoc.Options (ReaderOptions) - -import Control.Monad.Except (throwError) -import Text.Pandoc.Error -import Text.Pandoc.Class - --- | Read native formatted text and return a Pandoc document. --- The input may be a full pandoc document, a block list, a block, --- an inline list, or an inline. Thus, for example, --- --- > Str "hi" --- --- will be treated as if it were --- --- > Pandoc nullMeta [Plain [Str "hi"]] --- -readNative :: PandocMonad m - => ReaderOptions - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of - Right doc -> return doc - Left _ -> throwError $ PandocParseError "couldn't read native" - -readBlocks :: String -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) - -readBlock :: String -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) - -readInlines :: String -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) - -readInline :: String -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s) - diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs deleted file mode 100644 index cec64895c..000000000 --- a/src/Text/Pandoc/Readers/OPML.hs +++ /dev/null @@ -1,103 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -module Text.Pandoc.Readers.OPML ( readOPML ) where -import Data.Char (toUpper) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Readers.Markdown (readMarkdown) -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) -import Data.Generics -import Control.Monad.State -import Data.Default -import Text.Pandoc.Class (PandocMonad) - -type OPML m = StateT OPMLState m - -data OPMLState = OPMLState{ - opmlSectionLevel :: Int - , opmlDocTitle :: Inlines - , opmlDocAuthors :: [Inlines] - , opmlDocDate :: Inlines - } deriving Show - -instance Default OPMLState where - def = OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } - -readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc -readOPML _ inp = do - (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp) - return $ - setTitle (opmlDocTitle st') $ - setAuthors (opmlDocAuthors st') $ - setDate (opmlDocDate st') $ - doc $ mconcat bs - --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) - --- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String -attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" - --- exceptT :: PandocMonad m => Either PandocError a -> OPML m a --- exceptT = either throwError return - -asHtml :: PandocMonad m => String -> OPML m Inlines -asHtml s = - (\(Pandoc _ bs) -> case bs of - [Plain ils] -> fromList ils - _ -> mempty) <$> (lift $ readHtml def s) - -asMarkdown :: PandocMonad m => String -> OPML m Blocks -asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s) - -getBlocks :: PandocMonad m => Element -> OPML m Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) - -parseBlock :: PandocMonad m => Content -> OPML m Blocks -parseBlock (Elem e) = - case qName (elName e) of - "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ strContent e]}) - "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ strContent e}) - "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ strContent e}) - "outline" -> gets opmlSectionLevel >>= sect . (+1) - "?xml" -> return mempty - _ -> getBlocks e - where sect n = do headerText <- asHtml $ attrValue "text" e - noteBlocks <- asMarkdown $ attrValue "_note" e - modify $ \st -> st{ opmlSectionLevel = n } - bs <- getBlocks e - modify $ \st -> st{ opmlSectionLevel = n - 1 } - let headerText' = case map toUpper (attrValue "type" e) of - "LINK" -> link - (attrValue "url" e) "" headerText - _ -> headerText - return $ header n headerText' <> noteBlocks <> bs -parseBlock _ = return mempty diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs deleted file mode 100644 index ac22f2c09..000000000 --- a/src/Text/Pandoc/Readers/Odt.hs +++ /dev/null @@ -1,113 +0,0 @@ -{-# LANGUAGE PatternGuards #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Reader.Odt - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Entry point to the odt reader. --} - -module Text.Pandoc.Readers.Odt ( readOdt ) where - -import Codec.Archive.Zip -import qualified Text.XML.Light as XML - -import qualified Data.ByteString.Lazy as B - -import System.FilePath - -import Control.Monad.Except (throwError) - -import Text.Pandoc.Class (PandocMonad) -import qualified Text.Pandoc.Class as P -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.Options -import Text.Pandoc.MediaBag -import qualified Text.Pandoc.UTF8 as UTF8 - -import Text.Pandoc.Readers.Odt.ContentReader -import Text.Pandoc.Readers.Odt.StyleReader - -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Shared (filteredFilesFromArchive) - -readOdt :: PandocMonad m - => ReaderOptions - -> B.ByteString - -> m Pandoc -readOdt opts bytes = case readOdt' opts bytes of - Right (doc, mb) -> do - P.setMediaBag mb - return doc - Left e -> throwError e - --- -readOdt' :: ReaderOptions - -> B.ByteString - -> Either PandocError (Pandoc, MediaBag) -readOdt' _ bytes = bytesToOdt bytes-- of --- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) --- Left err -> Left err - --- -bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) -bytesToOdt bytes = case toArchiveOrFail bytes of - Right archive -> archiveToOdt archive - Left _ -> Left $ PandocParseError "Couldn't parse odt file." - --- -archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive - | Just contentEntry <- findEntryByPath "content.xml" archive - , Just stylesEntry <- findEntryByPath "styles.xml" archive - , Just contentElem <- entryToXmlElem contentEntry - , Just stylesElem <- entryToXmlElem stylesEntry - , Right styles <- chooseMax (readStylesAt stylesElem ) - (readStylesAt contentElem) - , media <- filteredFilesFromArchive archive filePathIsOdtMedia - , startState <- readerState styles media - , Right pandocWithMedia <- runConverter' read_body - startState - contentElem - - = Right pandocWithMedia - - | otherwise - -- Not very detailed, but I don't think more information would be helpful - = Left $ PandocParseError "Couldn't parse odt file." - where - filePathIsOdtMedia :: FilePath -> Bool - filePathIsOdtMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "Pictures/") - - --- -entryToXmlElem :: Entry -> Maybe XML.Element -entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs deleted file mode 100644 index b056f1ecc..000000000 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ /dev/null @@ -1,253 +0,0 @@ -{-# LANGUAGE Arrows #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE FlexibleInstances #-} -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Arrows.State - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -An arrow that transports a state. It is in essence a more powerful version of -the standard state monad. As it is such a simple extension, there are -other version out there that do exactly the same. -The implementation is duplicated, though, to add some useful features. -Most of these might be implemented without access to innards, but it's much -faster and easier to implement this way. --} - -module Text.Pandoc.Readers.Odt.Arrows.State where - -import Prelude hiding ( foldr, foldl ) - -import qualified Control.Category as Cat -import Control.Arrow -import Control.Monad - -import Data.Foldable -import Data.Monoid - -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible - - -newtype ArrowState state a b = ArrowState - { runArrowState :: (state, a) -> (state, b) } - --- | Constructor -withState :: (state -> a -> (state, b)) -> ArrowState state a b -withState = ArrowState . uncurry - --- | Constructor -withState' :: ((state, a) -> (state, b)) -> ArrowState state a b -withState' = ArrowState - --- | Constructor -modifyState :: (state -> state ) -> ArrowState state a a -modifyState = ArrowState . first - --- | Constructor -ignoringState :: ( a -> b ) -> ArrowState state a b -ignoringState = ArrowState . second - --- | Constructor -fromState :: (state -> (state, b)) -> ArrowState state a b -fromState = ArrowState . (.fst) - --- | Constructor -extractFromState :: (state -> b ) -> ArrowState state x b -extractFromState f = ArrowState $ \(state,_) -> (state, f state) - --- | Constructor -withUnchangedState :: (state -> a -> b ) -> ArrowState state a b -withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) - --- | Constructor -tryModifyState :: (state -> Either f state) - -> ArrowState state a (Either f a) -tryModifyState f = ArrowState $ \(state,a) - -> (state,).Left ||| (,Right a) $ f state - -instance Cat.Category (ArrowState s) where - id = ArrowState id - arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1) - -instance Arrow (ArrowState state) where - arr = ignoringState - first a = ArrowState $ \(s,(aF,aS)) - -> second (,aS) $ runArrowState a (s,aF) - second a = ArrowState $ \(s,(aF,aS)) - -> second (aF,) $ runArrowState a (s,aS) - -instance ArrowChoice (ArrowState state) where - left a = ArrowState $ \(s,e) -> case e of - Left l -> second Left $ runArrowState a (s,l) - Right r -> (s, Right r) - right a = ArrowState $ \(s,e) -> case e of - Left l -> (s, Left l) - Right r -> second Right $ runArrowState a (s,r) - -instance ArrowLoop (ArrowState state) where - loop a = ArrowState $ \(s, x) - -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) - in (s', x') - -instance ArrowApply (ArrowState state) where - app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) - - --- | Embedding of a state arrow in a state arrow with a different state type. -switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y -switchState there back a = ArrowState $ first there - >>> runArrowState a - >>> first back - --- | Lift a state arrow to modify the state of an arrow --- with a different state type. -liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x -liftToState unlift a = modifyState $ unlift &&& id - >>> runArrowState a - >>> snd - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like the identity arrow, --- save for side effects in the state. -withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x -withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' -withSubState' unlift a = ArrowState $ runArrowState unlift - >>> switch - >>> runArrowState a - >>> switch - where switch (x,y) = (y,x) - --- | Switches the type of the state temporarily. --- Drops the intermediate result state, behaving like a fallible --- identity arrow, save for side effects in the state. -withSubStateF :: ArrowState s x (Either f s') - -> ArrowState s' s (Either f s ) - -> ArrowState s x (Either f x ) -withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a) - >>^ spreadChoice - >>^ fmap fst - --- | Switches the type of the state temporarily. --- Returns the resulting sub-state. -withSubStateF' :: ArrowState s x (Either f s') - -> ArrowState s' s (Either f s ) - -> ArrowState s x (Either f s') -withSubStateF' unlift a = ArrowState go - where go p@(s,_) = tryRunning unlift - ( tryRunning a (second Right) ) - p - where tryRunning a' b v = case runArrowState a' v of - (_ , Left f) -> (s, Left f) - (x , Right y) -> b (y,x) - --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f - where a' x (s',m) = second (m <>) $ runArrowState a (s',x) - --- | Fold a state arrow through something 'Foldable'. Collect the results --- in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. -foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m -foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f - where a' (s',m) x = second (m <>) $ runArrowState a (s',x) - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldS' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. Collect the --- results in a 'Monoid'. --- Intermediate form of a fold between one with "only" a 'Monoid' --- and one with any function. --- If the iteration fails, the state will be reset to the initial one. -foldSL' :: (Foldable f, Monoid m) - => ArrowState s x (Either e m) - -> ArrowState s (f x) (Either e m) -foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'', Right (m <> m')) - (_ ,Left e ) -> (s , Left e) - a' _ e _ = e - --- | Fold a state arrow through something 'Foldable'. Collect the results in a --- 'MonadPlus'. -iterateS :: (Foldable f, MonadPlus m) - => ArrowState s x y - -> ArrowState s (f x) (m y) -iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f - where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x) - --- | Fold a state arrow through something 'Foldable'. Collect the results in a --- 'MonadPlus'. -iterateSL :: (Foldable f, MonadPlus m) - => ArrowState s x y - -> ArrowState s (f x) (m y) -iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f - where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x) - - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateS' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f - where a' s x (s',Right m) = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ _ e = e - --- | Fold a fallible state arrow through something 'Foldable'. --- Collect the results in a 'MonadPlus'. --- If the iteration fails, the state will be reset to the initial one. -iterateSL' :: (Foldable f, MonadPlus m) - => ArrowState s x (Either e y ) - -> ArrowState s (f x) (Either e (m y)) -iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f - where a' s (s',Right m) x = case runArrowState a (s',x) of - (s'',Right m') -> (s'',Right $ mplus m $ return m') - (_ ,Left e ) -> (s ,Left e ) - a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs deleted file mode 100644 index 218a85661..000000000 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ /dev/null @@ -1,495 +0,0 @@ -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Arrows.Utils - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Utility functions for Arrows (Kleisli monads). - -Some general notes on notation: - -* "^" is meant to stand for a pure function that is lifted into an arrow -based on its usage for that purpose in "Control.Arrow". -* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function -with an equivalent return value. -* "_" stands for the dropping of a value. --} - --- We export everything -module Text.Pandoc.Readers.Odt.Arrows.Utils where - -import Control.Arrow -import Control.Monad ( join, MonadPlus(..) ) - -import qualified Data.Foldable as F -import Data.Monoid - -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils - -and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') -and2 = (&&&) - -and3 :: (Arrow a) - => a b c0->a b c1->a b c2 - -> a b (c0,c1,c2 ) -and4 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3 - -> a b (c0,c1,c2,c3 ) -and5 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4 - -> a b (c0,c1,c2,c3,c4 ) -and6 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 - -> a b (c0,c1,c2,c3,c4,c5 ) -and7 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 - -> a b (c0,c1,c2,c3,c4,c5,c6 ) -and8 :: (Arrow a) - => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 - -> a b (c0,c1,c2,c3,c4,c5,c6,c7) - -and3 a b c = (and2 a b ) &&& c - >>^ \((z,y ) , x) -> (z,y,x ) -and4 a b c d = (and3 a b c ) &&& d - >>^ \((z,y,x ) , w) -> (z,y,x,w ) -and5 a b c d e = (and4 a b c d ) &&& e - >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) -and6 a b c d e f = (and5 a b c d e ) &&& f - >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) -and7 a b c d e f g = (and6 a b c d e f ) &&& g - >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) -and8 a b c d e f g h = (and7 a b c d e f g) &&& h - >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) - -liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z -liftA2 f a b = a &&& b >>^ uncurry f - -liftA3 :: (Arrow a) => (z->y->x -> r) - -> a b z->a b y->a b x - -> a b r -liftA4 :: (Arrow a) => (z->y->x->w -> r) - -> a b z->a b y->a b x->a b w - -> a b r -liftA5 :: (Arrow a) => (z->y->x->w->v -> r) - -> a b z->a b y->a b x->a b w->a b v - -> a b r -liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u - -> a b r -liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t - -> a b r -liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) - -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s - -> a b r - -liftA3 fun a b c = and3 a b c >>^ uncurry3 fun -liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun -liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun -liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun -liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun -liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun - -liftA :: (Arrow a) => (y -> z) -> a b y -> a b z -liftA fun a = a >>^ fun - - --- | Duplicate a value to subsequently feed it into different arrows. --- Can almost always be replaced with '(&&&)', 'keepingTheValue', --- or even '(|||)'. --- Aequivalent to --- > returnA &&& returnA -duplicate :: (Arrow a) => a b (b,b) -duplicate = arr $ join (,) - --- | Lifts the combination of two values into an arrow. -joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z -joinOn = arr.uncurry - --- | Applies a function to the uncurried result-pair of an arrow-application. --- (The %-symbol was chosen to evoke an association with pairs.) -(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>% f = a >>^ uncurry f - --- | '(>>%)' with its arguments flipped -(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(%<<) = flip (>>%) - --- | Precomposition with an uncurried function -(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f %>> a = uncurry f ^>> a - --- | Precomposition with an uncurried function (right to left variant) -(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<%) = flip (%>>) - -infixr 2 >>%, %<<, %>>, <<% - - --- | Duplicate a value and apply an arrow to the second instance. --- Aequivalent to --- > \a -> duplicate >>> second a --- or --- > \a -> returnA &&& a -keepingTheValue :: (Arrow a) => a b c -> a b (b,c) -keepingTheValue a = returnA &&& a - --- | Duplicate a value and apply an arrow to the first instance. --- Aequivalent to --- > \a -> duplicate >>> first a --- or --- > \a -> a &&& returnA -keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) -keepingTheValue' a = a &&& returnA - --- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. --- Actually, it's the more complex '(>=>)', because 'bind' alone does not --- combine as nicely in arrow form. --- The current implementation is not the most efficient one, because it can --- not return directly if a 'Nothing' is encountered. That in turn follows --- from the type system, as 'Nothing' has an "invisible" type parameter that --- can not be dropped early. --- --- Also, there probably is a way to generalize this to other monads --- or applicatives, but I'm leaving that as an exercise to the reader. --- I have a feeling there is a new Arrow-typeclass to be found that is less --- restrictive than 'ArrowApply'. If it is already out there, --- I have not seen it yet. ('ArrowPlus' for example is not general enough.) -(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) -a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join - -infixr 2 >>>= - --- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. --- (But still different from a true bind) -(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) -(>++<) = liftA2 mplus - --- | Left-compose with a pure function -leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) -leftLift = left.arr - --- | Right-compose with a pure function -rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') -rightLift = right.arr - - -( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') -( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') -( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') - -l ^+++ r = leftLift l >>> right r -l +++^ r = left l >>> rightLift r -l ^+++^ r = leftLift l >>> rightLift r - -infixr 2 ^+++, +++^, ^+++^ - -( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d -( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d -( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d - -l ^||| r = arr l ||| r -l |||^ r = l ||| arr r -l ^|||^ r = arr l ||| arr r - -infixr 2 ^||| , |||^, ^|||^ - -( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') -( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') -( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') - -l ^&&& r = arr l &&& r -l &&&^ r = l &&& arr r -l ^&&&^ r = arr l &&& arr r - -infixr 3 ^&&&, &&&^, ^&&&^ - -( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') -( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') -( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') - -l ^*** r = arr l *** r -l ***^ r = l *** arr r -l ^***^ r = arr l *** arr r - -infixr 3 ^***, ***^, ^***^ - --- | A version of --- --- >>> \p -> arr (\x -> if p x the Right x else Left x) --- --- but with p being an arrow -choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) -choose checkValue = keepingTheValue checkValue >>^ select - where select (x,True ) = Right x - select (x,False ) = Left x - --- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. -choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) -choiceToMaybe = arr eitherToMaybe - --- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@. -maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b) -maybeToChoice = arr maybeToEither - --- | Lifts a constant value into an arrow -returnV :: (Arrow a) => c -> a x c -returnV = arr.const - --- | 'returnA' dropping everything -returnA_ :: (Arrow a) => a _b () -returnA_ = returnV () - --- | Wrapper for an arrow that can be evaluated im parallel. All --- Arrows can be evaluated in parallel, as long as they return a --- monoid. -newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } - deriving (Eq, Ord, Show) - -instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where - mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend - --- | Evaluates a collection of arrows in a parallel fashion. --- --- This is in essence a fold of '(&&&)' over the collection, --- so the actual execution order and parallelity depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- --- This function can be seen as a generalization of --- 'Control.Applicative.sequenceA' to arrows or as an alternative to --- a fold with 'Control.Applicative.WrappedArrow', which --- substitutes the monoid with function application. --- -coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m -coEval = evalParallelArrow . (F.foldMap CoEval) - --- | Defines Left as failure, Right as success -type FallibleArrow a input failure success = a input (Either failure success) - -type ReFallibleArrow a failure success success' - = FallibleArrow a (Either failure success) failure success' - --- | Wrapper for fallible arrows. Fallible arrows are all arrows that return --- an Either value where left is a faliure and right is a success value. -newtype AlternativeArrow a input failure success - = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } - - -instance (ArrowChoice a, Monoid failure) - => Monoid (AlternativeArrow a input failure success) where - mempty = TryArrow $ returnV $ Left mempty - (TryArrow a) `mappend` (TryArrow b) - = TryArrow $ a &&& b - >>^ \(a',~b') - -> ( (\a'' -> left (mappend a'') b') ||| Right ) - a' - --- | Evaluates a collection of fallible arrows, trying each one in succession. --- Left values are interpreted as failures, right values as successes. --- --- The evaluation is stopped once an arrow succeeds. --- Up to that point, all failures are collected in the failure-monoid. --- Note that '()' is a monoid, and thus can serve as a failure-collector if --- you are uninterested in the exact failures. --- --- This is in essence a fold of '(&&&)' over the collection, enhanced with a --- little bit of repackaging, so the actual execution order depends on the --- implementation of '(&&&)' in the arrow in question. --- The default implementation of '(&&&)' for example keeps the --- order as given in the collection. --- -tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) - => f (FallibleArrow a b failure success) - -> FallibleArrow a b failure success -tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) - --- -liftSuccess :: (ArrowChoice a) - => (success -> success') - -> ReFallibleArrow a failure success success' -liftSuccess = rightLift - --- -liftAsSuccess :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -liftAsSuccess a = a >>^ Right - --- -asFallibleArrow :: (ArrowChoice a) - => a x success - -> FallibleArrow a x failure success -asFallibleArrow a = a >>^ Right - --- | Raises an error into a 'ReFallibleArrow' if the arrow is already in --- "error mode" -liftError :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -liftError e = leftLift (e <>) - --- | Raises an error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseA :: (ArrowChoice a) - => failure - -> FallibleArrow a x failure success -_raiseA e = returnV (Left e) - --- | Raises an empty error into a 'FallibleArrow', droping both the arrow input --- and any previously stored error value. -_raiseAEmpty :: (ArrowChoice a, Monoid failure) - => FallibleArrow a x failure success -_raiseAEmpty = _raiseA mempty - --- | Raises an error into a 'ReFallibleArrow', possibly appending the new error --- to an existing one -raiseA :: (ArrowChoice a, Monoid failure) - => failure - -> ReFallibleArrow a failure success success -raiseA e = arr $ Left.(either (<> e) (const e)) - --- | Raises an empty error into a 'ReFallibleArrow'. If there already is an --- error, nothing changes. --- (Note that this function is only aequivalent to @raiseA mempty@ iff the --- failure monoid follows the monoid laws.) -raiseAEmpty :: (ArrowChoice a, Monoid failure) - => ReFallibleArrow a failure success success -raiseAEmpty = arr (fromRight (const mempty) >>> Left) - - --- | Execute the second arrow if the first succeeds -(>>?) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> FallibleArrow a success failure success' - -> FallibleArrow a x failure success' -a >>? b = a >>> Left ^||| b - --- | Execute the lifted second arrow if the first succeeds -(>>?^) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> (success -> success') - -> FallibleArrow a x failure success' -a >>?^ f = a >>^ Left ^|||^ Right . f - --- | Execute the lifted second arrow if the first succeeds -(>>?^?) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a >>?^? b = a >>> Left ^|||^ b - --- | Execute the second arrow if the lifted first arrow succeeds -(^>>?) :: (ArrowChoice a) - => (x -> Either failure success) - -> FallibleArrow a success failure success' - -> FallibleArrow a x failure success' -a ^>>? b = a ^>> Left ^||| b - --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> success') - -> FallibleArrow a x failure success' -a ^>>?^ f = arr $ a >>> right f - --- | Execute the lifted second arrow if the lifted first arrow succeeds -(^>>?^?) :: (ArrowChoice a) - => (x -> Either failure success) - -> (success -> Either failure success') - -> FallibleArrow a x failure success' -a ^>>?^? f = a ^>> Left ^|||^ f - --- | Execute the second, non-fallible arrow if the first arrow succeeds -(>>?!) :: (ArrowChoice a) - => FallibleArrow a x failure success - -> a success success' - -> FallibleArrow a x failure success' -a >>?! f = a >>> right f - ---- -(>>?%) :: (ArrowChoice a) - => FallibleArrow a x f (b,b') - -> (b -> b' -> c) - -> FallibleArrow a x f c -a >>?% f = a >>?^ (uncurry f) - ---- -(^>>?%) :: (ArrowChoice a) - => (x -> Either f (b,b')) - -> (b -> b' -> c) - -> FallibleArrow a x f c -a ^>>?% f = arr a >>?^ (uncurry f) - ---- -(>>?%?) :: (ArrowChoice a) - => FallibleArrow a x f (b,b') - -> (b -> b' -> (Either f c)) - -> FallibleArrow a x f c -a >>?%? f = a >>?^? (uncurry f) - -infixr 1 >>?, >>?^, >>?^? -infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! -infixr 1 >>?%, ^>>?%, >>?%? - --- | Keep values that are Right, replace Left values by a constant. -ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v -ifFailedUse v = arr $ either (const v) id - --- | '(&&)' lifted into an arrow -(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<&&>) = liftA2 (&&) - --- | '(||)' lifted into an arrow -(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool -(<||>) = liftA2 (||) - --- | An equivalent of '(&&)' in a fallible arrow -(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s - -> FallibleArrow a x f s' - -> FallibleArrow a x f (s,s') -(>&&<) = liftA2 chooseMin - --- | An equivalent of '(||)' in some forms of fallible arrows -(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s - -> FallibleArrow a x f s - -> FallibleArrow a x f s -(>||<) = liftA2 chooseMax - --- | An arrow version of a short-circuit (<|>) -ifFailedDo :: (ArrowChoice a) - => FallibleArrow a x f y - -> FallibleArrow a x f y - -> FallibleArrow a x f y -ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) - where repackage (x , Left _) = Left x - repackage (_ , Right y) = Right y - -infixr 4 <&&>, <||>, >&&<, >||< -infixr 1 `ifFailedDo` - - diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs deleted file mode 100644 index 1f095bade..000000000 --- a/src/Text/Pandoc/Readers/Odt/Base.hs +++ /dev/null @@ -1,43 +0,0 @@ -{-# LANGUAGE PatternGuards #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Base - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Core types of the odt reader. --} - -module Text.Pandoc.Readers.Odt.Base where - -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Namespaces - -type OdtConverterState s = XMLConverterState Namespace s - -type XMLReader s a b = FallibleXMLConverter Namespace s a b - -type XMLReaderSafe s a b = XMLConverter Namespace s a b - diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs deleted file mode 100644 index a1bd8cb59..000000000 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ /dev/null @@ -1,929 +0,0 @@ -{-# LANGUAGE Arrows #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE RecordWildCards #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.ContentReader - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -The core of the odt reader that converts odt features into Pandoc types. --} - -module Text.Pandoc.Readers.Odt.ContentReader -( readerState -, read_body -) where - -import Control.Arrow -import Control.Applicative hiding ( liftA, liftA2, liftA3 ) - -import qualified Data.ByteString.Lazy as B -import qualified Data.Map as M -import Data.List ( find, intercalate ) -import Data.Maybe - -import qualified Text.XML.Light as XML - -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Text.Pandoc.Shared - -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.StyleReader - -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils - -import qualified Data.Set as Set - --------------------------------------------------------------------------------- --- State --------------------------------------------------------------------------------- - -type Anchor = String -type Media = [(FilePath, B.ByteString)] - -data ReaderState - = ReaderState { -- | A collection of styles read somewhere else. - -- It is only queried here, not modified. - styleSet :: Styles - -- | A stack of the styles of parent elements. - -- Used to look up inherited style properties. - , styleTrace :: [Style] - -- | Keeps track of the current depth in nested lists - , currentListLevel :: ListLevel - -- | Lists may provide their own style, but they don't have - -- to. If they do not, the style of a parent list may be used - -- or even a default list style from the paragraph style. - -- This value keeps track of the closest list style there - -- currently is. - , currentListStyle :: Maybe ListStyle - -- | A map from internal anchor names to "pretty" ones. - -- The mapping is a purely cosmetic one. - , bookmarkAnchors :: M.Map Anchor Anchor - -- | A map of files / binary data from the archive - , envMedia :: Media - -- | Hold binary resources used in the document - , odtMediaBag :: MediaBag --- , sequences --- , trackedChangeIDs - } - deriving ( Show ) - -readerState :: Styles -> Media -> ReaderState -readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty - --- -pushStyle' :: Style -> ReaderState -> ReaderState -pushStyle' style state = state { styleTrace = style : styleTrace state } - --- -popStyle' :: ReaderState -> ReaderState -popStyle' state = case styleTrace state of - _:trace -> state { styleTrace = trace } - _ -> state - --- -modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState) -modifyListLevel f state = state { currentListLevel = f (currentListLevel state) } - --- -shiftListLevel :: ListLevel -> (ReaderState -> ReaderState) -shiftListLevel diff = modifyListLevel (+ diff) - --- -swapCurrentListStyle :: Maybe ListStyle -> ReaderState - -> (ReaderState, Maybe ListStyle) -swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle } - , currentListStyle state - ) - --- -lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor -lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors - --- -putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState -putPrettyAnchor ugly pretty state@ReaderState{..} - = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors } - --- -usedAnchors :: ReaderState -> [Anchor] -usedAnchors ReaderState{..} = M.elems bookmarkAnchors - -getMediaBag :: ReaderState -> MediaBag -getMediaBag ReaderState{..} = odtMediaBag - -getMediaEnv :: ReaderState -> Media -getMediaEnv ReaderState{..} = envMedia - -insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState -insertMedia' (fp, bs) state@ReaderState{..} - = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag } - --------------------------------------------------------------------------------- --- Reader type and associated tools --------------------------------------------------------------------------------- - -type OdtReader a b = XMLReader ReaderState a b - -type OdtReaderSafe a b = XMLReaderSafe ReaderState a b - --- | Extract something from the styles -fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b -fromStyles f = keepingTheValue - (getExtraState >>^ styleSet) - >>% f - --- -getStyleByName :: OdtReader StyleName Style -getStyleByName = fromStyles lookupStyle >>^ maybeToChoice - --- -findStyleFamily :: OdtReader Style StyleFamily -findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice - --- -lookupListStyle :: OdtReader StyleName ListStyle -lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice - --- -switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) -switchCurrentListStyle = keepingTheValue getExtraState - >>% swapCurrentListStyle - >>> first setExtraState - >>^ snd - --- -pushStyle :: OdtReaderSafe Style Style -pushStyle = keepingTheValue ( - ( keepingTheValue getExtraState - >>% pushStyle' - ) - >>> setExtraState - ) - >>^ fst - --- -popStyle :: OdtReaderSafe x x -popStyle = keepingTheValue ( - getExtraState - >>> arr popStyle' - >>> setExtraState - ) - >>^ fst - --- -getCurrentListLevel :: OdtReaderSafe _x ListLevel -getCurrentListLevel = getExtraState >>^ currentListLevel - --- -updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) -updateMediaWithResource = keepingTheValue ( - (keepingTheValue getExtraState - >>% insertMedia' - ) - >>> setExtraState - ) - >>^ fst - -lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) -lookupResource = proc target -> do - state <- getExtraState -< () - case lookup target (getMediaEnv state) of - Just bs -> returnV (target, bs) -<< () - Nothing -> returnV ("", B.empty) -< () - -type AnchorPrefix = String - --- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a --- unique identifier but without assuming that the id should be for a header. --- Second argument is a list of already used identifiers. -uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor -uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `elem` usedIdents - then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of - Just x -> numIdent x - Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent - --- | First argument: basis for a new "pretty" anchor if none exists yet --- Second argument: a key ("ugly" anchor) --- Returns: saved "pretty" anchor or created new one -getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor -getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do - state <- getExtraState -< () - case lookupPrettyAnchor uglyAnchor state of - Just prettyAnchor -> returnA -< prettyAnchor - Nothing -> do - let newPretty = uniqueIdentFrom baseIdent (usedAnchors state) - modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty - --- | Input: basis for a new header anchor --- Ouput: saved new anchor -getHeaderAnchor :: OdtReaderSafe Inlines Anchor -getHeaderAnchor = proc title -> do - state <- getExtraState -< () - let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state) - modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor - - --------------------------------------------------------------------------------- --- Working with styles --------------------------------------------------------------------------------- - --- -readStyleByName :: OdtReader _x (StyleName, Style) -readStyleByName = - findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE - where - liftE :: (StyleName, Fallible Style) -> Fallible (StyleName, Style) - liftE (name, Right v) = Right (name, v) - liftE (_, Left v) = Left v - --- -isStyleToTrace :: OdtReader Style Bool -isStyleToTrace = findStyleFamily >>?^ (==FaText) - --- -withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines -withNewStyle a = proc x -> do - fStyle <- readStyleByName -< () - case fStyle of - Right (styleName, _) | isCodeStyle styleName -> do - inlines <- a -< x - arr inlineCode -<< inlines - Right (_, style) -> do - mFamily <- arr styleFamily -< style - fTextProps <- arr ( maybeToChoice - . textProperties - . styleProperties - ) -< style - case fTextProps of - Right textProps -> do - state <- getExtraState -< () - let triple = (state, textProps, mFamily) - modifier <- arr modifierFromStyleDiff -< triple - fShouldTrace <- isStyleToTrace -< style - case fShouldTrace of - Right shouldTrace -> do - if shouldTrace - then do - pushStyle -< style - inlines <- a -< x - popStyle -< () - arr modifier -<< inlines - else - -- In case anything goes wrong - a -< x - Left _ -> a -< x - Left _ -> a -< x - Left _ -> a -< x - where - isCodeStyle :: StyleName -> Bool - isCodeStyle "Source_Text" = True - isCodeStyle _ = False - - inlineCode :: Inlines -> Inlines - inlineCode = code . intercalate "" . map stringify . toList - -type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) -type InlineModifier = Inlines -> Inlines - --- | Given data about the local style changes, calculates how to modify --- an instance of 'Inlines' -modifierFromStyleDiff :: PropertyTriple -> InlineModifier -modifierFromStyleDiff propertyTriple = - composition $ - (getVPosModifier propertyTriple) - : map (first ($ propertyTriple) >>> ifThen_else ignore) - [ (hasEmphChanged , emph ) - , (hasChanged isStrong , strong ) - , (hasChanged strikethrough , strikeout ) - ] - where - ifThen_else else' (if',then') = if if' then then' else else' - - ignore = id :: InlineModifier - - getVPosModifier :: PropertyTriple -> InlineModifier - getVPosModifier triple@(_,textProps,_) = - let getVPos = Just . verticalPosition - in case lookupPreviousValueM getVPos triple of - Nothing -> ignore - Just oldVPos -> getVPosModifier' (oldVPos, verticalPosition textProps) - - getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore - getVPosModifier' ( _ , VPosSub ) = subscript - getVPosModifier' ( _ , VPosSuper ) = superscript - getVPosModifier' ( _ , _ ) = ignore - - hasEmphChanged :: PropertyTriple -> Bool - hasEmphChanged = swing any [ hasChanged isEmphasised - , hasChangedM pitch - , hasChanged underline - ] - - hasChanged property triple@(_, property -> newProperty, _) = - maybe True (/=newProperty) (lookupPreviousValue property triple) - - hasChangedM property triple@(_, textProps,_) = - fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple - - lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties) - - lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties) - - lookupPreviousStyleValue f (ReaderState{..},_,mFamily) - = ( findBy f $ extendedStylePropertyChain styleTrace styleSet ) - <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily ) - - -type ParaModifier = Blocks -> Blocks - -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5 -_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5 - --- | Returns either 'id' or 'blockQuote' depending on the current indentation -getParaModifier :: Style -> ParaModifier -getParaModifier Style{..} | Just props <- paraProperties styleProperties - , isBlockQuote (indentation props) - (margin_left props) - = blockQuote - | otherwise - = id - where - isBlockQuote mIndent mMargin - | LengthValueMM indent <- mIndent - , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - = True - | LengthValueMM margin <- mMargin - , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - = True - | LengthValueMM indent <- mIndent - , LengthValueMM margin <- mMargin - = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ - - | PercentValue indent <- mIndent - , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - = True - | PercentValue margin <- mMargin - , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - = True - | PercentValue indent <- mIndent - , PercentValue margin <- mMargin - = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ - - | otherwise - = False - --- -constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks -constructPara reader = proc blocks -> do - fStyle <- readStyleByName -< blocks - case fStyle of - Left _ -> reader -< blocks - Right (styleName, _) | isTableCaptionStyle styleName -> do - blocks' <- reader -< blocks - arr tableCaptionP -< blocks' - Right (_, style) -> do - let modifier = getParaModifier style - blocks' <- reader -< blocks - arr modifier -<< blocks' - where - isTableCaptionStyle :: StyleName -> Bool - isTableCaptionStyle "Table" = True - isTableCaptionStyle _ = False - tableCaptionP b = divWith ("", ["caption"], []) b - -type ListConstructor = [Blocks] -> Blocks - -getListConstructor :: ListLevelStyle -> ListConstructor -getListConstructor ListLevelStyle{..} = - case listLevelType of - LltBullet -> bulletList - LltImage -> bulletList - LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat - listNumberDelim = toListNumberDelim listItemPrefix - listItemSuffix - in orderedListWith (listItemStart, listNumberStyle, listNumberDelim) - where - toListNumberStyle LinfNone = DefaultStyle - toListNumberStyle LinfNumber = Decimal - toListNumberStyle LinfRomanLC = LowerRoman - toListNumberStyle LinfRomanUC = UpperRoman - toListNumberStyle LinfAlphaLC = LowerAlpha - toListNumberStyle LinfAlphaUC = UpperAlpha - toListNumberStyle (LinfString _) = Example - - toListNumberDelim Nothing (Just ".") = Period - toListNumberDelim (Just "" ) (Just ".") = Period - toListNumberDelim Nothing (Just ")") = OneParen - toListNumberDelim (Just "" ) (Just ")") = OneParen - toListNumberDelim (Just "(") (Just ")") = TwoParens - toListNumberDelim _ _ = DefaultDelim - - --- | Determines which style to use for a list, which level to use of that --- style, and which type of list to create as a result of this information. --- Then prepares the state for eventual child lists and constructs the list from --- the results. --- Two main cases are handled: The list may provide its own style or it may --- rely on a parent list's style. I the former case the current style in the --- state must be switched before and after the call to the child converter --- while in the latter the child converter can be called directly. --- If anything goes wrong, a default ordered-list-constructor is used. -constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks -constructList reader = proc x -> do - modifyExtraState (shiftListLevel 1) -< () - listLevel <- getCurrentListLevel -< () - fStyleName <- findAttr NsText "style-name" -< () - case fStyleName of - Right styleName -> do - fListStyle <- lookupListStyle -< styleName - case fListStyle of - Right listStyle -> do - fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) - case fLLS of - Just listLevelStyle -> do - oldListStyle <- switchCurrentListStyle -< Just listStyle - blocks <- constructListWith listLevelStyle -<< x - switchCurrentListStyle -< oldListStyle - returnA -< blocks - Nothing -> constructOrderedList -< x - Left _ -> constructOrderedList -< x - Left _ -> do - state <- getExtraState -< () - mListStyle <- arr currentListStyle -< state - case mListStyle of - Just listStyle -> do - fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) - case fLLS of - Just listLevelStyle -> constructListWith listLevelStyle -<< x - Nothing -> constructOrderedList -< x - Nothing -> constructOrderedList -< x - where - constructOrderedList = - reader - >>> modifyExtraState (shiftListLevel (-1)) - >>^ orderedList - constructListWith listLevelStyle = - reader - >>> getListConstructor listLevelStyle - ^>> modifyExtraState (shiftListLevel (-1)) - --------------------------------------------------------------------------------- --- Readers --------------------------------------------------------------------------------- - -type ElementMatcher result = (Namespace, ElementName, OdtReader result result) - -type InlineMatcher = ElementMatcher Inlines - -type BlockMatcher = ElementMatcher Blocks - - --- -matchingElement :: (Monoid e) - => Namespace -> ElementName - -> OdtReaderSafe e e - -> ElementMatcher e -matchingElement ns name reader = (ns, name, asResultAccumulator reader) - where - asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>) - --- -matchChildContent' :: (Monoid result) - => [ElementMatcher result] - -> OdtReaderSafe _x result -matchChildContent' ls = returnV mempty >>> matchContent' ls - --- -matchChildContent :: (Monoid result) - => [ElementMatcher result] - -> OdtReaderSafe (result, XML.Content) result - -> OdtReaderSafe _x result -matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - - --------------------------------------------- --- Matchers --------------------------------------------- - ----------------------- --- Basics ----------------------- - --- --- | Open Document allows several consecutive spaces if they are marked up -read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>% recover - where - -- fallible version - read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines - read_plain_text' = ( second ( arr extractText ) - >>^ spreadChoice >>?! second text - ) - >>?% (<>) - -- - extractText :: XML.Content -> Fallible String - extractText (XML.Text cData) = succeedWith (XML.cdData cData) - extractText _ = failEmpty - -read_text_seq :: InlineMatcher -read_text_seq = matchingElement NsText "sequence" - $ matchChildContent [] read_plain_text - - --- specifically. I honor that, although the current implementation of '(<>)' --- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein. --- The rational is to be prepared for future modifications. -read_spaces :: InlineMatcher -read_spaces = matchingElement NsText "s" ( - readAttrWithDefault NsText "c" 1 -- how many spaces? - >>^ fromList.(`replicate` Space) - ) --- -read_line_break :: InlineMatcher -read_line_break = matchingElement NsText "line-break" - $ returnV linebreak - --- -read_span :: InlineMatcher -read_span = matchingElement NsText "span" - $ withNewStyle - $ matchChildContent [ read_span - , read_spaces - , read_line_break - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - ] read_plain_text - --- -read_paragraph :: BlockMatcher -read_paragraph = matchingElement NsText "p" - $ constructPara - $ liftA para - $ withNewStyle - $ matchChildContent [ read_span - , read_spaces - , read_line_break - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - , read_maybe_nested_img_frame - , read_text_seq - ] read_plain_text - - ----------------------- --- Headers ----------------------- - --- -read_header :: BlockMatcher -read_header = matchingElement NsText "h" - $ proc blocks -> do - level <- ( readAttrWithDefault NsText "outline-level" 1 - ) -< blocks - children <- ( matchChildContent [ read_span - , read_spaces - , read_line_break - , read_link - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - , read_maybe_nested_img_frame - ] read_plain_text - ) -< blocks - anchor <- getHeaderAnchor -< children - let idAttr = (anchor, [], []) -- no classes, no key-value pairs - arr (uncurry3 headerWith) -< (idAttr, level, children) - ----------------------- --- Lists ----------------------- - --- -read_list :: BlockMatcher -read_list = matchingElement NsText "list" --- $ withIncreasedListLevel - $ constructList --- $ liftA bulletList - $ matchChildContent' [ read_list_item - ] --- -read_list_item :: ElementMatcher [Blocks] -read_list_item = matchingElement NsText "list-item" - $ liftA (compactify.(:[])) - ( matchChildContent' [ read_paragraph - , read_header - , read_list - ] - ) - - ----------------------- --- Links ----------------------- - -read_link :: InlineMatcher -read_link = matchingElement NsText "a" - $ liftA3 link - ( findAttrWithDefault NsXLink "href" "" ) - ( findAttrWithDefault NsOffice "title" "" ) - ( matchChildContent [ read_span - , read_note - , read_citation - , read_bookmark - , read_bookmark_start - , read_reference_start - , read_bookmark_ref - , read_reference_ref - ] read_plain_text ) - - -------------------------- --- Footnotes -------------------------- - -read_note :: InlineMatcher -read_note = matchingElement NsText "note" - $ liftA note - $ matchChildContent' [ read_note_body ] - -read_note_body :: BlockMatcher -read_note_body = matchingElement NsText "note-body" - $ matchChildContent' [ read_paragraph ] - -------------------------- --- Citations -------------------------- - -read_citation :: InlineMatcher -read_citation = matchingElement NsText "bibliography-mark" - $ liftA2 cite - ( liftA2 makeCitation - ( findAttrWithDefault NsText "identifier" "" ) - ( readAttrWithDefault NsText "number" 0 ) - ) - ( matchChildContent [] read_plain_text ) - where - makeCitation :: String -> Int -> [Citation] - makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] - - ----------------------- --- Tables ----------------------- - --- -read_table :: BlockMatcher -read_table = matchingElement NsTable "table" - $ liftA simpleTable' - $ matchChildContent' [ read_table_row - ] - --- | A simple table without a caption or headers --- | Infers the number of headers from rows -simpleTable' :: [[Blocks]] -> Blocks -simpleTable' [] = simpleTable [] [] -simpleTable' (x : rest) = simpleTable (fmap (const defaults) x) (x : rest) - where defaults = fromList [] - --- -read_table_row :: ElementMatcher [[Blocks]] -read_table_row = matchingElement NsTable "table-row" - $ liftA (:[]) - $ matchChildContent' [ read_table_cell - ] - --- -read_table_cell :: ElementMatcher [Blocks] -read_table_cell = matchingElement NsTable "table-cell" - $ liftA (compactify.(:[])) - $ matchChildContent' [ read_paragraph - ] - ----------------------- --- Images ----------------------- - --- -read_maybe_nested_img_frame :: InlineMatcher -read_maybe_nested_img_frame = matchingElement NsDraw "frame" - $ proc blocks -> do - img <- (findChild' NsDraw "image") -< () - case img of - Just _ -> read_frame -< blocks - Nothing -> matchChildContent' [ read_frame_text_box ] -< blocks - -read_frame :: OdtReaderSafe Inlines Inlines -read_frame = - proc blocks -> do - w <- ( findAttr' NsSVG "width" ) -< () - h <- ( findAttr' NsSVG "height" ) -< () - titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks - src <- matchChildContent' [ read_image_src ] -< blocks - resource <- lookupResource -< src - _ <- updateMediaWithResource -< resource - alt <- (matchChildContent [] read_plain_text) -< blocks - arr (uncurry4 imageWith ) -< - (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt) - -image_attributes :: Maybe String -> Maybe String -> Attr -image_attributes x y = - ( "", [], (dim "width" x) ++ (dim "height" y)) - where - dim _ (Just "") = [] - dim name (Just v) = [(name, v)] - dim _ Nothing = [] - -read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor) -read_image_src = matchingElement NsDraw "image" - $ proc _ -> do - imgSrc <- findAttr NsXLink "href" -< () - case imgSrc of - Right src -> returnV src -<< () - Left _ -> returnV "" -< () - -read_frame_title :: InlineMatcher -read_frame_title = matchingElement NsSVG "title" - $ (matchChildContent [] read_plain_text) - -read_frame_text_box :: InlineMatcher -read_frame_text_box = matchingElement NsDraw "text-box" - $ proc blocks -> do - paragraphs <- (matchChildContent' [ read_paragraph ]) -< blocks - arr read_img_with_caption -< toList paragraphs - -read_img_with_caption :: [Block] -> Inlines -read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) = - singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption -read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) = - singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows -read_img_with_caption ( (Para (_ : xs)) : ys) = - read_img_with_caption ((Para xs) : ys) -read_img_with_caption _ = - mempty - ----------------------- --- Internal links ----------------------- - -_ANCHOR_PREFIX_ :: String -_ANCHOR_PREFIX_ = "anchor" - --- -readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttr NsText "name" - --- | Beware: may fail -findAnchorName :: OdtReader AnchorPrefix Anchor -findAnchorName = ( keepingTheValue readAnchorAttr - >>^ spreadChoice - ) >>?! getPrettyAnchor - - --- -maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix - -> OdtReaderSafe Inlines Inlines -maybeAddAnchorFrom anchorReader = - keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) - >>> - proc (inlines, fAnchorElem) -> do - case fAnchorElem of - Right anchorElem -> returnA -< anchorElem - Left _ -> returnA -< inlines - where - toAnchorElem :: Anchor -> Inlines - toAnchorElem anchorID = spanWith (anchorID, [], []) mempty - -- no classes, no key-value pairs - --- -read_bookmark :: InlineMatcher -read_bookmark = matchingElement NsText "bookmark" - $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) - --- -read_bookmark_start :: InlineMatcher -read_bookmark_start = matchingElement NsText "bookmark-start" - $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) - --- -read_reference_start :: InlineMatcher -read_reference_start = matchingElement NsText "reference-mark-start" - $ maybeAddAnchorFrom readAnchorAttr - --- | Beware: may fail -findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttr NsText "ref-name" - >>?^ (_ANCHOR_PREFIX_,) - ) >>?! getPrettyAnchor - - --- -maybeInAnchorRef :: OdtReaderSafe Inlines Inlines -maybeInAnchorRef = proc inlines -> do - fRef <- findAnchorRef -< () - case fRef of - Right anchor -> - arr (toAnchorRef anchor) -<< inlines - Left _ -> returnA -< inlines - where - toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ('#':anchor) "" -- no title - --- -read_bookmark_ref :: InlineMatcher -read_bookmark_ref = matchingElement NsText "bookmark-ref" - $ maybeInAnchorRef - <<< matchChildContent [] read_plain_text - --- -read_reference_ref :: InlineMatcher -read_reference_ref = matchingElement NsText "reference-ref" - $ maybeInAnchorRef - <<< matchChildContent [] read_plain_text - - ----------------------- --- Entry point ----------------------- - ---read_plain_content :: OdtReaderSafe _x Inlines ---read_plain_content = strContent >>^ text - -read_text :: OdtReaderSafe _x Pandoc -read_text = matchChildContent' [ read_header - , read_paragraph - , read_list - , read_table - ] - >>^ doc - -post_process :: Pandoc -> Pandoc -post_process (Pandoc m blocks) = - Pandoc m (post_process' blocks) - -post_process' :: [Block] -> [Block] -post_process' ((Table _ a w h r) : (Div ("", ["caption"], _) [Para inlines] ) : xs) = - (Table inlines a w h r) : ( post_process' xs ) -post_process' bs = bs - -read_body :: OdtReader _x (Pandoc, MediaBag) -read_body = executeIn NsOffice "body" - $ executeIn NsOffice "text" - $ liftAsSuccess - $ proc inlines -> do - txt <- read_text -< inlines - state <- getExtraState -< () - returnA -< (post_process txt, getMediaBag state) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs deleted file mode 100644 index 877443543..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ /dev/null @@ -1,260 +0,0 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Generic.Fallible - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Data types and utilities representing failure. Most of it is based on the -"Either" type in its usual configuration (left represents failure). - -In most cases, the failure type is implied or required to be a "Monoid". - -The choice of "Either" instead of a custom type makes it easier to write -compatible instances of "ArrowChoice". --} - --- We export everything -module Text.Pandoc.Readers.Odt.Generic.Fallible where - -import Control.Applicative -import Control.Monad - -import qualified Data.Foldable as F -import Data.Monoid ((<>)) - --- | Default for now. Will probably become a class at some point. -type Failure = () - -type Fallible a = Either Failure a - - --- | False -> Left (), True -> Right () -boolToEither :: Bool -> Fallible () -boolToEither False = Left () -boolToEither True = Right () - --- | False -> Left (), True -> Right () -boolToChoice :: Bool -> Fallible () -boolToChoice False = Left () -boolToChoice True = Right () - --- -maybeToEither :: Maybe a -> Fallible a -maybeToEither (Just a) = Right a -maybeToEither Nothing = Left () - --- -eitherToMaybe :: Either _l a -> Maybe a -eitherToMaybe (Left _) = Nothing -eitherToMaybe (Right a) = Just a - --- | > untagEither === either id id -untagEither :: Either a a -> a -untagEither (Left a) = a -untagEither (Right a) = a - --- | > fromLeft f === either f id -fromLeft :: (a -> b) -> Either a b -> b -fromLeft f (Left a) = f a -fromLeft _ (Right b) = b - --- | > fromRight f === either id f -fromRight :: (a -> b) -> Either b a -> b -fromRight _ (Left b) = b -fromRight f (Right a) = f a - --- | > recover a === fromLeft (const a) === either (const a) id -recover :: a -> Either _f a -> a -recover a (Left _) = a -recover _ (Right a) = a - --- | I would love to use 'fail'. Alas, 'Monad.fail'... -failWith :: failure -> Either failure _x -failWith f = Left f - --- -failEmpty :: (Monoid failure) => Either failure _x -failEmpty = failWith mempty - --- -succeedWith :: a -> Either _x a -succeedWith = Right - --- -collapseEither :: Either failure (Either failure x) - -> Either failure x -collapseEither (Left f ) = Left f -collapseEither (Right (Left f)) = Left f -collapseEither (Right (Right x)) = Right x - --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- both are returned. -chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') -chooseMin = chooseMinWith (,) - --- | If either of the values represents an error, the result is a --- (possibly combined) error. If both values represent a success, --- a combination is returned. -chooseMinWith :: (Monoid a) => (b -> b' -> c) - -> Either a b - -> Either a b' - -> Either a c -chooseMinWith (><) (Right a) (Right b) = Right $ a >< b -chooseMinWith _ (Left a) (Left b) = Left $ a <> b -chooseMinWith _ (Left a) _ = Left a -chooseMinWith _ _ (Left b) = Left b - --- | If either of the values represents a non-error, the result is a --- (possibly combined) non-error. If both values represent an error, an error --- is returned. -chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b -chooseMax = chooseMaxWith (<>) - --- | If either of the values represents a non-error, the result is a --- (possibly combined) non-error. If both values represent an error, an error --- is returned. -chooseMaxWith :: (Monoid a) => (b -> b -> b) - -> Either a b - -> Either a b - -> Either a b -chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b -chooseMaxWith _ (Left a) (Left b) = Left $ a <> b -chooseMaxWith _ (Right a) _ = Right a -chooseMaxWith _ _ (Right b) = Right b - - --- | Class of containers that can escalate contained 'Either's. --- The word "Vector" is meant in the sense of a disease transmitter. -class ChoiceVector v where - spreadChoice :: v (Either f a) -> Either f (v a) - --- Let's do a few examples first - -instance ChoiceVector Maybe where - spreadChoice (Just (Left f)) = Left f - spreadChoice (Just (Right x)) = Right (Just x) - spreadChoice Nothing = Right Nothing - -instance ChoiceVector (Either l) where - spreadChoice (Right (Left f)) = Left f - spreadChoice (Right (Right x)) = Right (Right x) - spreadChoice (Left x ) = Right (Left x) - -instance ChoiceVector ((,) a) where - spreadChoice (_, Left f) = Left f - spreadChoice (x, Right y) = Right (x,y) - -- Wasn't there a newtype somewhere with the elements flipped? - --- --- More instances later, first some discussion. --- --- I'll have to freshen up on type system details to see how (or if) to do --- something like --- --- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where --- > : --- --- But maybe it would be even better to use something like --- --- > class ChoiceVector v v' f | v -> v' f where --- > spreadChoice :: v -> Either f v' --- --- That way, more places in @v@ could spread the cheer, e.g.: --- --- As before: --- -- ( a , Either f b) (a , b) f --- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where --- > spreadChoice (_, Left f) = Left f --- > spreadChoice (a, Right b) = Right (a,b) --- --- But also: --- -- ( Either f a , b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where --- > spreadChoice (Right a,b) = Right (a,b) --- > spreadChoice (Left f,_) = Left f --- --- And maybe even: --- -- ( Either f a , Either f b) (a , b) f --- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where --- > spreadChoice (Right a , Right b) = Right (a,b) --- > spreadChoice (Left f , _ ) = Left f --- > spreadChoice ( _ , Left f) = Left f --- --- Of course that would lead to a lot of overlapping instances... --- But I can't think of a different way. A selector function might help, --- but not even a "Data.Traversable" is powerful enough for that. --- But maybe someone has already solved all this with a lens library. --- --- Well, it's an interesting academic question. But for practical purposes, --- I have more than enough right now. - -instance ChoiceVector ((,,) a b) where - spreadChoice (_,_, Left f) = Left f - spreadChoice (a,b, Right x) = Right (a,b,x) - -instance ChoiceVector ((,,,) a b c) where - spreadChoice (_,_,_, Left f) = Left f - spreadChoice (a,b,c, Right x) = Right (a,b,c,x) - -instance ChoiceVector ((,,,,) a b c d) where - spreadChoice (_,_,_,_, Left f) = Left f - spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) - -instance ChoiceVector (Const a) where - spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types - --- | Fails on the first error -instance ChoiceVector [] where - spreadChoice = sequence -- using the monad instance of Either. - -- Could be generalized to "Data.Traversable" - but why play - -- with UndecidableInstances unless this is really needed. - --- | Wrapper for a list. While the normal list instance of 'ChoiceVector' --- fails whenever it can, this type will never fail. -newtype SuccessList a = SuccessList { collectNonFailing :: [a] } - deriving ( Eq, Ord, Show ) - -instance ChoiceVector SuccessList where - spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing - where unTagRight (Right x) = (x:) - unTagRight _ = id - --- | Like 'catMaybes', but for 'Either'. -collectRights :: [Either _l r] -> [r] -collectRights = collectNonFailing . untag . spreadChoice . SuccessList - where untag = fromLeft (error "Unexpected Left") - --- | A version of 'collectRights' generalized to other containers. The --- container must be both "reducible" and "buildable". Most general containers --- should fullfill these requirements, but there is no single typeclass --- (that I know of) for that. --- Therefore, they are split between 'Foldable' and 'MonadPlus'. --- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) -collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r -collectRightsF = F.foldr unTagRight mzero - where unTagRight (Right x) = mplus $ return x - unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs deleted file mode 100644 index 82ae3e20e..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ /dev/null @@ -1,62 +0,0 @@ -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Generic.Namespaces - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -A class containing a set of namespace identifiers. Used to convert between -typesafe Haskell namespace identifiers and unsafe "real world" namespaces. --} - -module Text.Pandoc.Readers.Odt.Generic.Namespaces where - -import qualified Data.Map as M - --- -type NameSpaceIRI = String - --- -type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI - --- -class (Eq nsID, Ord nsID) => NameSpaceID nsID where - - -- | Given a IRI, possibly update the map and return the id of the namespace. - -- May fail if the namespace is unknown and the application does not - -- allow unknown namespaces. - getNamespaceID :: NameSpaceIRI - -> NameSpaceIRIs nsID - -> Maybe (NameSpaceIRIs nsID, nsID) - -- | Given a namespace id, lookup its IRI. May be overriden for performance. - getIRI :: nsID - -> NameSpaceIRIs nsID - -> Maybe NameSpaceIRI - -- | The root element of an XML document has a namespace, too, and the - -- "XML.Light-parser" is eager to remove the corresponding namespace - -- attribute. - -- As a result, at least this root namespace must be provided. - getInitialIRImap :: NameSpaceIRIs nsID - - getIRI = M.lookup - getInitialIRImap = M.empty diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs deleted file mode 100644 index afd7d616c..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs +++ /dev/null @@ -1,48 +0,0 @@ -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Generic.SetMap - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -A map of values to sets of values. --} - -module Text.Pandoc.Readers.Odt.Generic.SetMap where - -import qualified Data.Map as M -import qualified Data.Set as S - -type SetMap k v = M.Map k (S.Set v) - -empty :: SetMap k v -empty = M.empty - -fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v -fromList = foldr (uncurry insert) empty - -insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v -insert key value setMap = M.insertWith S.union key (S.singleton value) setMap - -union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v -union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3 diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs deleted file mode 100644 index 6c10ed61d..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ /dev/null @@ -1,171 +0,0 @@ -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Reader.Odt.Generic.Utils - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -General utility functions for the odt reader. --} - -module Text.Pandoc.Readers.Odt.Generic.Utils -( uncurry3 -, uncurry4 -, uncurry5 -, uncurry6 -, uncurry7 -, uncurry8 -, swap -, reverseComposition -, bool -, tryToRead -, Lookupable(..) -, readLookupables -, readLookupable -, readPercent -, findBy -, swing -, composition -) where - -import Control.Category ( Category, (>>>), (<<<) ) -import qualified Control.Category as Cat ( id ) -import Control.Monad ( msum ) - -import qualified Data.Foldable as F ( Foldable, foldr ) -import Data.Maybe - - --- | Aequivalent to --- > foldr (.) id --- where '(.)' are 'id' are the ones from "Control.Category" --- and 'foldr' is the one from "Data.Foldable". --- The noun-form was chosen to be consistend with 'sum', 'product' etc --- based on the discussion at --- <https://groups.google.com/forum/#!topic/haskell-cafe/VkOZM1zaHOI> --- (that I was not part of) -composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a -composition = F.foldr (<<<) Cat.id - --- | Aequivalent to --- > foldr (flip (.)) id --- where '(.)' are 'id' are the ones from "Control.Category" --- and 'foldr' is the one from "Data.Foldable". --- A reversed version of 'composition'. -reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a -reverseComposition = F.foldr (>>>) Cat.id - --- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'. --- Note that the first value is selected if the boolean value is 'False'. --- That makes 'bool' consistent with the other two. Also, 'bool' now takes its --- arguments in the exact opposite order compared to the normal if construct. -bool :: a -> a -> Bool -> a -bool x _ False = x -bool _ x True = x - --- | This function often makes it possible to switch values with the functions --- that are applied to them. --- --- Examples: --- > swing map :: [a -> b] -> a -> [b] --- > swing any :: [a -> Bool] -> a -> Bool --- > swing foldr :: b -> a -> [a -> b -> b] -> b --- > swing scanr :: c -> a -> [a -> c -> c] -> c --- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c] --- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool) --- --- Stolen from <https://wiki.haskell.org/Pointfree> -swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d -swing = flip.(.flip id) --- swing f c a = f ($ a) c - - --- | Alternative to 'read'/'reads'. The former of these throws errors --- (nobody wants that) while the latter returns "to much" for simple purposes. --- This function instead applies 'reads' and returns the first match (if any) --- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst - --- | A version of 'reads' that requires a '%' sign after the number -readPercent :: ReadS Int -readPercent s = [ (i,s') | (i , r ) <- reads s - , ("%" , s') <- lex r - ] - --- | Data that can be looked up. --- This is mostly a utility to read data with kind *. -class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - let result = lookup word lookupTable, - isJust result, - let Just a = result - ] - --- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s - -uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z -uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z -uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z -uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z -uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z -uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z - -uncurry3 fun (a,b,c ) = fun a b c -uncurry4 fun (a,b,c,d ) = fun a b c d -uncurry5 fun (a,b,c,d,e ) = fun a b c d e -uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f -uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g -uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h - -swap :: (a,b) -> (b,a) -swap (a,b) = (b,a) - --- | A version of "Data.List.find" that uses a converter to a Maybe instance. --- The returned value is the first which the converter returns in a 'Just' --- wrapper. -findBy :: (a -> Maybe b) -> [a] -> Maybe b -findBy _ [] = Nothing -findBy f ((f -> Just x):_ ) = Just x -findBy f ( _:xs) = findBy f xs - diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs deleted file mode 100644 index 8c03d1a09..000000000 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ /dev/null @@ -1,1063 +0,0 @@ -{-# LANGUAGE Arrows #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -A generalized XML parser based on stateful arrows. -It might be sufficient to define this reader as a comonad, but there is -not a lot of use in trying. --} - -module Text.Pandoc.Readers.Odt.Generic.XMLConverter -( ElementName -, XMLConverterState -, XMLConverter -, FallibleXMLConverter -, swapPosition -, runConverter -, runConverter'' -, runConverter' -, runConverterF' -, runConverterF -, getCurrentElement -, getExtraState -, setExtraState -, modifyExtraState -, convertingExtraState -, producingExtraState -, lookupNSiri -, lookupNSprefix -, readNSattributes -, elemName -, elemNameIs -, strContent -, elContent -, currentElem -, currentElemIs -, expectElement -, elChildren -, findChildren -, filterChildren -, filterChildrenName -, findChild' -, findChild -, filterChild' -, filterChild -, filterChildName' -, filterChildName -, isSet -, isSet' -, isSetWithDefault -, hasAttrValueOf' -, failIfNotAttrValueOf -, isThatTheAttrValue -, searchAttrIn -, searchAttrWith -, searchAttr -, lookupAttr -, lookupAttr' -, lookupAttrWithDefault -, lookupDefaultingAttr -, findAttr' -, findAttr -, findAttrWithDefault -, readAttr -, readAttr' -, readAttrWithDefault -, getAttr --- , (>/<) --- , (?>/<) -, executeIn -, collectEvery -, withEveryL -, withEvery -, tryAll -, tryAll' -, IdXMLConverter -, MaybeEConverter -, ElementMatchConverter -, MaybeCConverter -, ContentMatchConverter -, makeMatcherE -, makeMatcherC -, prepareMatchersE -, prepareMatchersC -, matchChildren -, matchContent'' -, matchContent' -, matchContent -) where - -import Control.Applicative hiding ( liftA, liftA2 ) -import Control.Monad ( MonadPlus ) -import Control.Arrow - -import qualified Data.Map as M -import qualified Data.Foldable as F -import Data.Default -import Data.Maybe - -import qualified Text.XML.Light as XML - -import Text.Pandoc.Readers.Odt.Arrows.State -import Text.Pandoc.Readers.Odt.Arrows.Utils - -import Text.Pandoc.Readers.Odt.Generic.Namespaces -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible - --------------------------------------------------------------------------------- --- Basis types for readability --------------------------------------------------------------------------------- - --- -type ElementName = String -type AttributeName = String -type AttributeValue = String - --- -type NameSpacePrefix = String - --- -type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix - --------------------------------------------------------------------------------- --- Main converter state --------------------------------------------------------------------------------- - --- GADT so some of the NameSpaceID restrictions can be deduced -data XMLConverterState nsID extraState where - XMLConverterState :: NameSpaceID nsID - => { -- | A stack of parent elements. The top element is the current one. - -- Arguably, a real Zipper would be better. But that is an - -- optimization that can be made at a later time, e.g. when - -- replacing Text.XML.Light. - parentElements :: [XML.Element] - -- | A map from internal namespace IDs to the namespace prefixes - -- used in XML elements - , namespacePrefixes :: NameSpacePrefixes nsID - -- | A map from internal namespace IDs to namespace IRIs - -- (Only necessary for matching namespace IDs and prefixes) - , namespaceIRIs :: NameSpaceIRIs nsID - -- | A place to put "something else". This feature is used heavily - -- to keep the main code cleaner. More specifically, the main reader - -- is divided into different stages. Each stage lifts something up - -- here, which the next stage can then use. This could of course be - -- generalized to a state-tree or used for the namespace IRIs. The - -- border between states and values is an imaginary one, after all. - -- But the separation as it is seems to be enough for now. - , moreState :: extraState - } - -> XMLConverterState nsID extraState - --- -createStartState :: (NameSpaceID nsID) - => XML.Element - -> extraState - -> XMLConverterState nsID extraState -createStartState element extraState = - XMLConverterState - { parentElements = [element] - , namespacePrefixes = M.empty - , namespaceIRIs = getInitialIRImap - , moreState = extraState - } - --- | Functor over extra state -instance Functor (XMLConverterState nsID) where - fmap f ( XMLConverterState parents prefixes iRIs extraState ) - = XMLConverterState parents prefixes iRIs (f extraState) - --- -replaceExtraState :: extraState - -> XMLConverterState nsID _x - -> XMLConverterState nsID extraState -replaceExtraState x s - = fmap (const x) s - --- -currentElement :: XMLConverterState nsID extraState - -> XML.Element -currentElement state = head (parentElements state) - --- | Replace the current position by another, modifying the extra state --- in the process -swapPosition :: (extraState -> extraState') - -> [XML.Element] - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState' -swapPosition f stack state - = state { parentElements = stack - , moreState = f (moreState state) - } - --- | Replace the current position by another, modifying the extra state --- in the process -swapStack' :: XMLConverterState nsID extraState - -> [XML.Element] - -> ( XMLConverterState nsID extraState , [XML.Element] ) -swapStack' state stack - = ( state { parentElements = stack } - , parentElements state - ) - --- -pushElement :: XML.Element - -> XMLConverterState nsID extraState - -> XMLConverterState nsID extraState -pushElement e state = state { parentElements = e:(parentElements state) } - --- | Pop the top element from the call stack, unless it is the last one. -popElement :: XMLConverterState nsID extraState - -> Maybe (XMLConverterState nsID extraState) -popElement state - | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es } - | otherwise = Nothing - --------------------------------------------------------------------------------- --- Main type --------------------------------------------------------------------------------- - --- It might be a good idea to pack the converters in a GADT --- Downside: data instead of type --- Upside: 'Failure' could be made a parameter as well. - --- -type XMLConverter nsID extraState input output - = ArrowState (XMLConverterState nsID extraState ) input output - -type FallibleXMLConverter nsID extraState input output - = XMLConverter nsID extraState input (Fallible output) - --- -runConverter :: XMLConverter nsID extraState input output - -> XMLConverterState nsID extraState - -> input - -> output -runConverter converter state input = snd $ runArrowState converter (state,input) - --- -runConverter'' :: (NameSpaceID nsID) - => XMLConverter nsID extraState (Fallible ()) output - -> extraState - -> XML.Element - -> output -runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () - -runConverter' :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState () success - -> extraState - -> XML.Element - -> Fallible success -runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () - --- -runConverterF' :: FallibleXMLConverter nsID extraState x y - -> XMLConverterState nsID extraState - -> Fallible x -> Fallible y -runConverterF' a s e = runConverter (returnV e >>? a) s e - --- -runConverterF :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState XML.Element x - -> extraState - -> Fallible XML.Element -> Fallible x -runConverterF a s = either failWith - (\e -> runConverter a (createStartState e s) e) - --- -getCurrentElement :: XMLConverter nsID extraState x XML.Element -getCurrentElement = extractFromState currentElement - --- -getExtraState :: XMLConverter nsID extraState x extraState -getExtraState = extractFromState moreState - --- -setExtraState :: XMLConverter nsID extraState extraState extraState -setExtraState = withState $ \state extra - -> (replaceExtraState extra state , extra) - - --- | Lifts a function to the extra state. -modifyExtraState :: (extraState -> extraState) - -> XMLConverter nsID extraState x x -modifyExtraState = modifyState.fmap - - --- | First sets the extra state to the new value. Then modifies the original --- extra state with a converter that uses the new state. Finally, the --- intermediate state is dropped and the extra state is lifted into the --- state as it was at the beginning of the function. --- As a result, exactly the extra state and nothing else is changed. --- The resulting converter even behaves like an identity converter on the --- value level. --- --- (The -ing form is meant to be mnemonic in a sequence of arrows as in --- convertingExtraState () converter >>> doOtherStuff) --- -convertingExtraState :: extraState' - -> FallibleXMLConverter nsID extraState' extraState extraState - -> FallibleXMLConverter nsID extraState x x -convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA - where - setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v - modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?% flip replaceExtraState - --- | First sets the extra state to the new value. Then produces a new --- extra state with a converter that uses the new state. Finally, the --- intermediate state is dropped and the extra state is lifted into the --- state as it was at the beginning of the function. --- As a result, exactly the extra state and nothing else is changed. --- The resulting converter even behaves like an identity converter on the --- value level. --- --- Aequivalent to --- --- > \v x a -> convertingExtraState v (returnV x >>> a) --- --- (The -ing form is meant to be mnemonic in a sequence of arrows as in --- producingExtraState () () producer >>> doOtherStuff) --- -producingExtraState :: extraState' - -> a - -> FallibleXMLConverter nsID extraState' a extraState - -> FallibleXMLConverter nsID extraState x x -producingExtraState v x a = convertingExtraState v (returnV x >>> a) - - --------------------------------------------------------------------------------- --- Work in namespaces --------------------------------------------------------------------------------- - --- | Arrow version of 'getIRI' -lookupNSiri :: (NameSpaceID nsID) - => nsID - -> XMLConverter nsID extraState x (Maybe NameSpaceIRI) -lookupNSiri nsID = extractFromState - $ \state -> getIRI nsID $ namespaceIRIs state - --- -lookupNSprefix :: (NameSpaceID nsID) - => nsID - -> XMLConverter nsID extraState x (Maybe NameSpacePrefix) -lookupNSprefix nsID = extractFromState - $ \state -> M.lookup nsID $ namespacePrefixes state - --- | Extracts namespace attributes from the current element and tries to --- update the current mapping accordingly -readNSattributes :: (NameSpaceID nsID) - => FallibleXMLConverter nsID extraState x () -readNSattributes = fromState $ \state -> maybe (state, failEmpty ) - ( , succeedWith ()) - (extractNSAttrs state ) - where - extractNSAttrs :: (NameSpaceID nsID) - => XMLConverterState nsID extraState - -> Maybe (XMLConverterState nsID extraState) - extractNSAttrs startState - = foldl (\state d -> state >>= addNS d) - (Just startState) - nsAttribs - where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) - element = currentElement startState - readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri) - = Just (name, iri) - readNSattr _ = Nothing - addNS (prefix, iri) state = fmap updateState - $ getNamespaceID iri - $ namespaceIRIs state - where updateState (iris,nsID) - = state { namespaceIRIs = iris - , namespacePrefixes = M.insert nsID prefix - $ namespacePrefixes state - } - --------------------------------------------------------------------------------- --- Common namespace accessors --------------------------------------------------------------------------------- - --- | Given a namespace id and an element name, creates a 'XML.QName' for --- internal use -elemName :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x XML.QName -elemName nsID name = lookupNSiri nsID - &&& lookupNSprefix nsID - >>% XML.QName name - --- | Checks if a given element matches both a specified namespace id --- and a specified element name -elemNameIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName - where hasThatName e iri = let elName = XML.elName e - in XML.qName elName == name - && XML.qURI elName == iri - --------------------------------------------------------------------------------- --- General content --------------------------------------------------------------------------------- - --- -strContent :: XMLConverter nsID extraState x String -strContent = getCurrentElement - >>^ XML.strContent - --- -elContent :: XMLConverter nsID extraState x [XML.Content] -elContent = getCurrentElement - >>^ XML.elContent - --------------------------------------------------------------------------------- --- Current element --------------------------------------------------------------------------------- - --- -currentElem :: XMLConverter nsID extraState x (XML.QName) -currentElem = getCurrentElement - >>^ XML.elName - -currentElemIs :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x Bool -currentElemIs nsID name = getCurrentElement - >>> elemNameIs nsID name - - - -{- -currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> - (XML.qName >>^ (&&).(== name) ) - ^&&&^ - (XML.qIRI >>^ (==) ) - ) >>% (.) - ) &&& lookupNSiri nsID >>% ($) --} - --- -expectElement :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x () -expectElement nsID name = currentElemIs nsID name - >>^ boolToChoice - --------------------------------------------------------------------------------- --- Chilren --------------------------------------------------------------------------------- - --- -elChildren :: XMLConverter nsID extraState x [XML.Element] -elChildren = getCurrentElement - >>^ XML.elChildren - --- -findChildren :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState x [XML.Element] -findChildren nsID name = elemName nsID name - &&& getCurrentElement - >>% XML.findChildren - --- -filterChildren :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildren p = getCurrentElement - >>^ XML.filterChildren p - --- -filterChildrenName :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x [XML.Element] -filterChildrenName p = getCurrentElement - >>^ XML.filterChildrenName p - --- -findChild' :: (NameSpaceID nsID) - => nsID - -> ElementName - -> XMLConverter nsID extraState x (Maybe XML.Element) -findChild' nsID name = elemName nsID name - &&& getCurrentElement - >>% XML.findChild - --- -findChild :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState x XML.Element -findChild nsID name = findChild' nsID name - >>> maybeToChoice - --- -filterChild' :: (XML.Element -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChild' p = getCurrentElement - >>^ XML.filterChild p - --- -filterChild :: (XML.Element -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChild p = filterChild' p - >>> maybeToChoice - --- -filterChildName' :: (XML.QName -> Bool) - -> XMLConverter nsID extraState x (Maybe XML.Element) -filterChildName' p = getCurrentElement - >>^ XML.filterChildName p - --- -filterChildName :: (XML.QName -> Bool) - -> FallibleXMLConverter nsID extraState x XML.Element -filterChildName p = filterChildName' p - >>> maybeToChoice - - --------------------------------------------------------------------------------- --- Attributes --------------------------------------------------------------------------------- - --- -isSet :: (NameSpaceID nsID) - => nsID -> AttributeName - -> (Either Failure Bool) - -> FallibleXMLConverter nsID extraState x Bool -isSet nsID attrName deflt - = findAttr' nsID attrName - >>^ maybe deflt stringToBool - --- -isSet' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe Bool) -isSet' nsID attrName = findAttr' nsID attrName - >>^ (>>= stringToBool') - -isSetWithDefault :: (NameSpaceID nsID) - => nsID -> AttributeName - -> Bool - -> XMLConverter nsID extraState x Bool -isSetWithDefault nsID attrName def' - = isSet' nsID attrName - >>^ fromMaybe def' - --- -hasAttrValueOf' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x Bool -hasAttrValueOf' nsID attrName attrValue - = findAttr nsID attrName - >>> ( const False ^|||^ (==attrValue)) - --- -failIfNotAttrValueOf :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> FallibleXMLConverter nsID extraState x () -failIfNotAttrValueOf nsID attrName attrValue - = hasAttrValueOf' nsID attrName attrValue - >>^ boolToChoice - --- | Is the value that is currently transported in the arrow the value of --- the specified attribute? -isThatTheAttrValue :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState AttributeValue Bool -isThatTheAttrValue nsID attrName - = keepingTheValue - (findAttr nsID attrName) - >>% right.(==) - --- | Lookup value in a dictionary, fail if no attribute found or value --- not in dictionary -searchAttrIn :: (NameSpaceID nsID) - => nsID -> AttributeName - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrIn nsID attrName dict - = findAttr nsID attrName - >>?^? maybeToChoice.(`lookup` dict ) - - --- | Lookup value in a dictionary. Fail if no attribute found. If value not in --- dictionary, return default value -searchAttrWith :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> FallibleXMLConverter nsID extraState x a -searchAttrWith nsID attrName defV dict - = findAttr nsID attrName - >>?^ (fromMaybe defV).(`lookup` dict ) - --- | Lookup value in a dictionary. If attribute or value not found, --- return default value -searchAttr :: (NameSpaceID nsID) - => nsID -> AttributeName - -> a - -> [(AttributeValue,a)] - -> XMLConverter nsID extraState x a -searchAttr nsID attrName defV dict - = searchAttrIn nsID attrName dict - >>> const defV ^|||^ id - --- | Read a 'Lookupable' attribute. Fail if no match. -lookupAttr :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x a -lookupAttr nsID attrName = lookupAttr' nsID attrName - >>^ maybeToChoice - - --- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'. -lookupAttr' :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe a) -lookupAttr' nsID attrName - = findAttr' nsID attrName - >>^ (>>= readLookupable) - --- | Read a 'Lookupable' attribute with explicit default -lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a) - => nsID -> AttributeName - -> a - -> XMLConverter nsID extraState x a -lookupAttrWithDefault nsID attrName deflt - = lookupAttr' nsID attrName - >>^ fromMaybe deflt - --- | Read a 'Lookupable' attribute with implicit default -lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) - => nsID -> AttributeName - -> XMLConverter nsID extraState x a -lookupDefaultingAttr nsID attrName - = lookupAttrWithDefault nsID attrName def - --- | Return value as a (Maybe String) -findAttr' :: (NameSpaceID nsID) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe AttributeValue) -findAttr' nsID attrName = elemName nsID attrName - &&& getCurrentElement - >>% XML.findAttr - --- | Return value as string or fail -findAttr :: (NameSpaceID nsID) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x AttributeValue -findAttr nsID attrName = findAttr' nsID attrName - >>> maybeToChoice - --- | Return value as string or return provided default value -findAttrWithDefault :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x AttributeValue -findAttrWithDefault nsID attrName deflt - = findAttr' nsID attrName - >>^ fromMaybe deflt - --- | Read and return value or fail -readAttr :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> FallibleXMLConverter nsID extraState x attrValue -readAttr nsID attrName = readAttr' nsID attrName - >>> maybeToChoice - --- | Read and return value or return Nothing -readAttr' :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> XMLConverter nsID extraState x (Maybe attrValue) -readAttr' nsID attrName = findAttr' nsID attrName - >>^ (>>= tryToRead) - --- | Read and return value or return provided default value -readAttrWithDefault :: (NameSpaceID nsID, Read attrValue) - => nsID -> AttributeName - -> attrValue - -> XMLConverter nsID extraState x attrValue -readAttrWithDefault nsID attrName deflt - = findAttr' nsID attrName - >>^ (>>= tryToRead) - >>^ fromMaybe deflt - --- | Read and return value or return default value from 'Default' instance -getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue) - => nsID -> AttributeName - -> XMLConverter nsID extraState x attrValue -getAttr nsID attrName = readAttrWithDefault nsID attrName def - --------------------------------------------------------------------------------- --- Movements --------------------------------------------------------------------------------- - --- -jumpThere :: XMLConverter nsID extraState XML.Element XML.Element -jumpThere = withState (\state element - -> ( pushElement element state , element ) - ) - --- -swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element] -swapStack = withState swapStack' - --- -jumpBack :: FallibleXMLConverter nsID extraState _x _x -jumpBack = tryModifyState (popElement >>> maybeToChoice) - --- | Support function for "procedural" converters: jump to an element, execute --- a converter, jump back. --- This version is safer than 'executeThere', because it does not rely on the --- internal stack. As a result, the converter can not move around in arbitrary --- ways. The downside is of course that some of the environment is not --- accessible to the converter. -switchingTheStack :: XMLConverter nsID moreState a b - -> XMLConverter nsID moreState (a, XML.Element) b -switchingTheStack a = second ( (:[]) ^>> swapStack ) - >>> first a - >>> second swapStack - >>^ fst - --- | Support function for "procedural" converters: jumps to an element, executes --- a converter, jumps back. --- Make sure that the converter is well-behaved; that is it should --- return to the exact position it started from in /every possible path/ of --- execution, even if it "fails". If it does not, you may encounter --- strange bugs. If you are not sure about the behaviour or want to use --- shortcuts, you can often use 'switchingTheStack' instead. -executeThere :: FallibleXMLConverter nsID moreState a b - -> FallibleXMLConverter nsID moreState (a, XML.Element) b -executeThere a = second jumpThere - >>> fst - ^>> a - >>> jumpBack -- >>? jumpBack would not ensure the jump. - >>^ collapseEither - --- | Do something in a sub-element, tnen come back -executeIn :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState f s - -> FallibleXMLConverter nsID extraState f s -executeIn nsID name a = keepingTheValue - (findChild nsID name) - >>> ignoringState liftFailure - >>? switchingTheStack a - where liftFailure (_, (Left f)) = Left f - liftFailure (x, (Right e)) = Right (x, e) - --------------------------------------------------------------------------------- --- Iterating over children --------------------------------------------------------------------------------- - --- Helper converter to prepare different types of iterations. --- It lifts the children (of a certain type) of the current element --- into the value level and pairs each one with the current input value. -prepareIteration :: (NameSpaceID nsID) - => nsID -> ElementName - -> XMLConverter nsID extraState b [(b, XML.Element)] -prepareIteration nsID name = keepingTheValue - (findChildren nsID name) - >>% distributeValue - --- | Applies a converter to every child element of a specific type. --- Collects results in a 'Monoid'. --- Fails completely if any conversion fails. -collectEvery :: (NameSpaceID nsID, Monoid m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a m - -> FallibleXMLConverter nsID extraState a m -collectEvery nsID name a = prepareIteration nsID name - >>> foldS' (switchingTheStack a) - --- -withEveryL :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a b - -> FallibleXMLConverter nsID extraState a [b] -withEveryL = withEvery - --- | Applies a converter to every child element of a specific type. --- Collects results in a 'MonadPlus'. --- Fails completely if any conversion fails. -withEvery :: (NameSpaceID nsID, MonadPlus m) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a b - -> FallibleXMLConverter nsID extraState a (m b) -withEvery nsID name a = prepareIteration nsID name - >>> iterateS' (switchingTheStack a) - --- | Applies a converter to every child element of a specific type. --- Collects all successful results in a list. -tryAll :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b [a] -tryAll nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ collectRights - --- | Applies a converter to every child element of a specific type. --- Collects all successful results. -tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState b a - -> XMLConverter nsID extraState b (c a) -tryAll' nsID name a = prepareIteration nsID name - >>> iterateS (switchingTheStack a) - >>^ collectRightsF - --------------------------------------------------------------------------------- --- Matching children --------------------------------------------------------------------------------- - -type IdXMLConverter nsID moreState x - = XMLConverter nsID moreState x x - -type MaybeEConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) - --- Chainable converter that helps deciding which converter to actually use. -type ElementMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeEConverter nsID extraState x, XML.Element) - -type MaybeCConverter nsID moreState x - = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) - --- Chainable converter that helps deciding which converter to actually use. -type ContentMatchConverter nsID extraState x - = IdXMLConverter nsID - extraState - (MaybeCConverter nsID extraState x, XML.Content) - --- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML elements to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherE :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ElementMatchConverter nsID extraState a -makeMatcherE nsID name c = ( second ( - elemNameIs nsID name - >>^ bool Nothing (Just tryC) - ) - >>% (<|>) - ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd - --- Helper function: The @c@ is actually a converter that is to be selected by --- matching XML content to the first two parameters. --- The fold used to match elements however is very simple, so to use it, --- this function wraps the converter in another converter that unifies --- the accumulator. Think of a lot of converters with the resulting type --- chained together. The accumulator not only transports the element --- unchanged to the next matcher, it also does the actual selecting by --- combining the intermediate results with '(<|>)'. -makeMatcherC :: (NameSpaceID nsID) - => nsID -> ElementName - -> FallibleXMLConverter nsID extraState a a - -> ContentMatchConverter nsID extraState a -makeMatcherC nsID name c = ( second ( contentToElem - >>> returnV Nothing - ||| ( elemNameIs nsID name - >>^ bool Nothing (Just cWithJump) - ) - ) - >>% (<|>) - ) &&&^ snd - where cWithJump = ( fst - ^&&& ( second contentToElem - >>> spreadChoice - ^>>? executeThere c - ) - >>% recover) - &&&^ snd - contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element - contentToElem = arr $ \e -> case e of - XML.Elem e' -> succeedWith e' - _ -> failEmpty - --- Creates and chains a bunch of matchers -prepareMatchersE :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ElementMatchConverter nsID extraState x ---prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) -prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) - --- Creates and chains a bunch of matchers -prepareMatchersC :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] - -> ContentMatchConverter nsID extraState x ---prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC) -prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) - --- | Takes a list of element-data - converter groups and --- * Finds all children of the current element --- * Matches each group to each child in order (at most one group per child) --- * Filters non-matched children --- * Chains all found converters in child-order --- * Applies the chain to the input element -matchChildren :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchChildren lookups = let matcher = prepareMatchersE lookups - in keepingTheValue ( - elChildren - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the element and drop the element - -- in the return value - swallowElem element converter = (,element) ^>> converter >>^ fst - --- -matchContent'' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent'' lookups = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowContent content converter = (,content) ^>> converter >>^ fst - - --- | Takes a list of element-data - converter groups and --- * Finds all content of the current element --- * Matches each group to each piece of content in order --- (at most one group per piece of content) --- * Filters non-matched content --- * Chains all found converters in content-order --- * Applies the chain to the input element -matchContent' :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState a a -matchContent' lookups = matchContent lookups (arr fst) - --- | Takes a list of element-data - converter groups and --- * Finds all content of the current element --- * Matches each group to each piece of content in order --- (at most one group per piece of content) --- * Adds a default converter for all non-matched content --- * Chains all found converters in content-order --- * Applies the chain to the input element -matchContent :: (NameSpaceID nsID) - => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] - -> XMLConverter nsID extraState (a,XML.Content) a - -> XMLConverter nsID extraState a a -matchContent lookups fallback - = let matcher = prepareMatchersC lookups - in keepingTheValue ( - elContent - >>> map (Nothing,) - ^>> iterateSL matcher - >>^ map swallowOrFallback - -- >>> foldSs - >>> reverseComposition - ) - >>> swap - ^>> app - where - -- let the converter swallow the content and drop the content - -- in the return value - swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst - swallowOrFallback (Nothing ,content) = (,content) ^>> fallback - --------------------------------------------------------------------------------- --- Internals --------------------------------------------------------------------------------- - -stringToBool :: (Monoid failure) => String -> Either failure Bool -stringToBool val -- stringToBool' val >>> maybeToChoice - | val `elem` trueValues = succeedWith True - | val `elem` falseValues = succeedWith False - | otherwise = failEmpty - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - -stringToBool' :: String -> Maybe Bool -stringToBool' val | val `elem` trueValues = Just True - | val `elem` falseValues = Just False - | otherwise = Nothing - where trueValues = ["true" ,"on" ,"1"] - falseValues = ["false","off","0"] - - -distributeValue :: a -> [b] -> [(a,b)] -distributeValue = map.(,) - --------------------------------------------------------------------------------- - -{- -NOTES -It might be a good idea to refactor the namespace stuff. -E.g.: if a namespace constructor took a string as a parameter, things like -> a ?>/< (NsText,"body") -would be nicer. -Together with a rename and some trickery, something like -> |< NsText "body" >< NsText "p" ?> a </> </>| -might even be possible. - -Some day, XML.Light should be replaced by something better. -While doing that, it might be useful to replace String as the type of element -names with something else, too. (Of course with OverloadedStrings). -While doing that, maybe the types can be created in a way that something like -> NsText:"body" -could be used. Overloading (:) does not sounds like the best idea, but if the -element name type was a list, this might be possible. -Of course that would be a bit hackish, so the "right" way would probably be -something like -> InNS NsText "body" -but isn't that a bit boring? ;) --} diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs deleted file mode 100644 index deb009998..000000000 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ /dev/null @@ -1,110 +0,0 @@ -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Reader.Odt.Namespaces - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Namespaces used in odt files. --} - -module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) - ) where - -import Data.List ( isPrefixOf ) -import Data.Maybe ( fromMaybe, listToMaybe ) -import qualified Data.Map as M ( empty, insert ) - -import Text.Pandoc.Readers.Odt.Generic.Namespaces - - -instance NameSpaceID Namespace where - - getInitialIRImap = nsIDmap - - getNamespaceID "" m = Just(m, NsXML) - getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri) - where asPair nsID = Just (M.insert nsID iri m, nsID) - - -findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri] - -nsIDmap :: NameSpaceIRIs Namespace -nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs - -data Namespace = -- Open Document core - NsOffice | NsStyle | NsText | NsTable | NsForm - | NsDraw | Ns3D | NsAnim | NsChart | NsConfig - | NsDB | NsMeta | NsNumber | NsScript | NsManifest - | NsPresentation - -- Metadata - | NsODF - -- Compatible elements - | NsXSL_FO | NsSVG | NsSmil - -- External standards - | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL - | NsDublinCore - -- Metadata manifest - | NsPKG - -- Others - | NsOpenFormula - -- Core XML (basically only for the 'id'-attribute) - | NsXML - -- Fallback - | NsOther String - deriving ( Eq, Ord, Show ) - --- | Not the actual iri's, but large prefixes of them - this way there are --- less versioning problems and the like. -nsIDs :: [(String,Namespace)] -nsIDs = [ - ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), - ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), - ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ), - ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ), - ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ), - ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ), - ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ), - ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ), - ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ), - ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ), - ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ), - ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ), - ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ), - ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ), - ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ), - ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ), - ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ), - ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ), - ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ), - ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ), - ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ), - ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ), - ("http://purl.org/dc/elements" , NsDublinCore ), - ("http://www.w3.org/2003/g/data-view" , NsGRDDL ), - ("http://www.w3.org/1998/Math/MathML" , NsMathML ), - ("http://www.w3.org/1999/xhtml" , NsXHtml ), - ("http://www.w3.org/2002/xforms" , NsXForms ), - ("http://www.w3.org/1999/xlink" , NsXLink ) - ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs deleted file mode 100644 index 26ba6df82..000000000 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ /dev/null @@ -1,744 +0,0 @@ -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE Arrows #-} - -{- -Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Odt.StyleReader - Copyright : Copyright (C) 2015 Martin Linnemann - License : GNU GPL, version 2 or above - - Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> - Stability : alpha - Portability : portable - -Reader for the style information in an odt document. --} - -module Text.Pandoc.Readers.Odt.StyleReader -( Style (..) -, StyleName -, StyleFamily (..) -, Styles (..) -, StyleProperties (..) -, TextProperties (..) -, ParaProperties (..) -, VerticalTextPosition (..) -, ListItemNumberFormat (..) -, ListLevel -, ListStyle (..) -, ListLevelStyle (..) -, ListLevelType (..) -, LengthOrPercent (..) -, lookupStyle -, getTextProperty -, getTextProperty' -, getParaProperty -, getListStyle -, getListLevelStyle -, getStyleFamily -, lookupDefaultStyle -, lookupDefaultStyle' -, lookupListStyleByName -, getPropertyChain -, textPropertyChain -, stylePropertyChain -, stylePropertyChain' -, getStylePropertyChain -, extendedStylePropertyChain -, extendedStylePropertyChain' -, liftStyles -, readStylesAt -) where - -import Control.Arrow -import Control.Applicative hiding ( liftA, liftA2, liftA3 ) - -import qualified Data.Foldable as F -import qualified Data.Map as M -import qualified Data.Set as S -import Data.Char ( isDigit ) -import Data.Default -import Data.List ( unfoldr ) -import Data.Maybe - -import qualified Text.XML.Light as XML - -import Text.Pandoc.Readers.Odt.Arrows.State -import Text.Pandoc.Readers.Odt.Arrows.Utils - -import Text.Pandoc.Readers.Odt.Generic.Utils -import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.XMLConverter - -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.Base - - -readStylesAt :: XML.Element -> Fallible Styles -readStylesAt e = runConverter' readAllStyles mempty e - --------------------------------------------------------------------------------- --- Reader for font declarations and font pitches --------------------------------------------------------------------------------- - --- Pandoc has no support for different font pitches. Yet knowing them can be --- very helpful in cases where Pandoc has more semantics than OpenDocument. --- In these cases, the pitch can help deciding as what to define a block of --- text. So let's start with a type for font pitches: - -data FontPitch = PitchVariable | PitchFixed - deriving ( Eq, Show ) - -instance Lookupable FontPitch where - lookupTable = [ ("variable" , PitchVariable) - , ("fixed" , PitchFixed ) - ] - -instance Default FontPitch where - def = PitchVariable - --- The font pitch can be specifed in a style directly. Normally, however, --- it is defined in the font. That is also the specs' recommendation. --- --- Thus, we want - -type FontFaceName = String - -type FontPitches = M.Map FontFaceName FontPitch - --- To get there, the fonts have to be read and the pitches extracted. --- But the resulting map are only needed at one later place, so it should not be --- transported on the value level, especially as we already use a state arrow. --- So instead, the resulting map is lifted into the state of the reader. --- (An alternative might be ImplicitParams, but again, we already have a state.) --- --- So the main style readers will have the types -type StyleReader a b = XMLReader FontPitches a b --- and -type StyleReaderSafe a b = XMLReaderSafe FontPitches a b --- respectively. --- --- But before we can work with these, we need to define the reader that reads --- the fonts: - --- | A reader for font pitches -fontPitchReader :: XMLReader _s _x FontPitches -fontPitchReader = executeIn NsOffice "font-face-decls" ( - ( withEveryL NsStyle "font-face" $ liftAsSuccess ( - findAttr' NsStyle "name" - &&& - lookupDefaultingAttr NsStyle "font-pitch" - ) - ) - >>?^ ( M.fromList . (foldl accumLegalPitches []) ) - ) - where accumLegalPitches ls (Nothing,_) = ls - accumLegalPitches ls (Just n,p) = (n,p):ls - - --- | A wrapper around the font pitch reader that lifts the result into the --- state. -readFontPitches :: StyleReader x x -readFontPitches = producingExtraState () () fontPitchReader - - --- | Looking up a pitch in the state of the arrow. --- --- The function does the following: --- * Look for the font pitch in an attribute. --- * If that fails, look for the font name, look up the font in the state --- and use the pitch from there. --- * Return the result in a Maybe --- -findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) -findPitch = ( lookupAttr NsStyle "font-pitch" - `ifFailedDo` findAttr NsStyle "font-name" - >>? ( keepingTheValue getExtraState - >>% M.lookup - >>^ maybeToChoice - ) - ) - >>> choiceToMaybe - --------------------------------------------------------------------------------- --- Definitions of main data --------------------------------------------------------------------------------- - -type StyleName = String - --- | There are two types of styles: named styles with a style family and an --- optional style parent, and default styles for each style family, --- defining default style properties -data Styles = Styles - { stylesByName :: M.Map StyleName Style - , listStylesByName :: M.Map StyleName ListStyle - , defaultStyleMap :: M.Map StyleFamily StyleProperties - } - deriving ( Show ) - --- Styles from a monoid under union -instance Monoid Styles where - mempty = Styles M.empty M.empty M.empty - mappend (Styles sBn1 dSm1 lsBn1) - (Styles sBn2 dSm2 lsBn2) - = Styles (M.union sBn1 sBn2) - (M.union dSm1 dSm2) - (M.union lsBn1 lsBn2) - --- Not all families from the specifications are implemented, only those we need. --- But there are none that are not mentioned here. -data StyleFamily = FaText | FaParagraph --- | FaTable | FaTableCell | FaTableColumn | FaTableRow --- | FaGraphic | FaDrawing | FaChart --- | FaPresentation --- | FaRuby - deriving ( Eq, Ord, Show ) - -instance Lookupable StyleFamily where - lookupTable = [ ( "text" , FaText ) - , ( "paragraph" , FaParagraph ) --- , ( "table" , FaTable ) --- , ( "table-cell" , FaTableCell ) --- , ( "table-column" , FaTableColumn ) --- , ( "table-row" , FaTableRow ) --- , ( "graphic" , FaGraphic ) --- , ( "drawing-page" , FaDrawing ) --- , ( "chart" , FaChart ) --- , ( "presentation" , FaPresentation ) --- , ( "ruby" , FaRuby ) - ] - --- | A named style -data Style = Style { styleFamily :: Maybe StyleFamily - , styleParentName :: Maybe StyleName - , listStyle :: Maybe StyleName - , styleProperties :: StyleProperties - } - deriving ( Eq, Show ) - -data StyleProperties = SProps { textProperties :: Maybe TextProperties - , paraProperties :: Maybe ParaProperties --- , tableColProperties :: Maybe TColProperties --- , tableRowProperties :: Maybe TRowProperties --- , tableCellProperties :: Maybe TCellProperties --- , tableProperties :: Maybe TableProperties --- , graphicProperties :: Maybe GraphProperties - } - deriving ( Eq, Show ) - -instance Default StyleProperties where - def = SProps { textProperties = Just def - , paraProperties = Just def - } - -data TextProperties = PropT { isEmphasised :: Bool - , isStrong :: Bool - , pitch :: Maybe FontPitch - , verticalPosition :: VerticalTextPosition - , underline :: Maybe UnderlineMode - , strikethrough :: Maybe UnderlineMode - } - deriving ( Eq, Show ) - -instance Default TextProperties where - def = PropT { isEmphasised = False - , isStrong = False - , pitch = Just def - , verticalPosition = def - , underline = Nothing - , strikethrough = Nothing - } - -data ParaProperties = PropP { paraNumbering :: ParaNumbering - , indentation :: LengthOrPercent - , margin_left :: LengthOrPercent - } - deriving ( Eq, Show ) - -instance Default ParaProperties where - def = PropP { paraNumbering = NumberingNone - , indentation = def - , margin_left = def - } - ----- --- All the little data types that make up the properties ----- - -data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub - deriving ( Eq, Show ) - -instance Default VerticalTextPosition where - def = VPosNormal - -instance Read VerticalTextPosition where - readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ] - ++ [ (VPosSuper , s') | ("super" , s') <- lexS ] - ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ] - where - lexS = lex s - signumToVPos n | n < 0 = VPosSub - | n > 0 = VPosSuper - | otherwise = VPosNormal - -data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace - deriving ( Eq, Show ) - -instance Lookupable UnderlineMode where - lookupTable = [ ( "continuous" , UnderlineModeNormal ) - , ( "skip-white-space" , UnderlineModeSkipWhitespace ) - ] - - -data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int - deriving ( Eq, Show ) - -data LengthOrPercent = LengthValueMM Int | PercentValue Int - deriving ( Eq, Show ) - -instance Default LengthOrPercent where - def = LengthValueMM 0 - -instance Read LengthOrPercent where - readsPrec _ s = - [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s] - ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s - , (unit , s'') <- reads s' - , let lengthMM = estimateInMillimeter - length' unit - ] - -data XslUnit = XslUnitMM | XslUnitCM - | XslUnitInch - | XslUnitPoints | XslUnitPica - | XslUnitPixel - | XslUnitEM - -instance Show XslUnit where - show XslUnitMM = "mm" - show XslUnitCM = "cm" - show XslUnitInch = "in" - show XslUnitPoints = "pt" - show XslUnitPica = "pc" - show XslUnitPixel = "px" - show XslUnitEM = "em" - -instance Read XslUnit where - readsPrec _ "mm" = [(XslUnitMM , "")] - readsPrec _ "cm" = [(XslUnitCM , "")] - readsPrec _ "in" = [(XslUnitInch , "")] - readsPrec _ "pt" = [(XslUnitPoints , "")] - readsPrec _ "pc" = [(XslUnitPica , "")] - readsPrec _ "px" = [(XslUnitPixel , "")] - readsPrec _ "em" = [(XslUnitEM , "")] - readsPrec _ _ = [] - --- | Rough conversion of measures into millimeters. --- Pixels and em's are actually implemetation dependant/relative measures, --- so I could not really easily calculate anything exact here even if I wanted. --- But I do not care about exactness right now, as I only use measures --- to determine if a paragraph is "indented" or not. -estimateInMillimeter :: Int -> XslUnit -> Int -estimateInMillimeter n XslUnitMM = n -estimateInMillimeter n XslUnitCM = n * 10 -estimateInMillimeter n XslUnitInch = n * 25 -- \* 25.4 -estimateInMillimeter n XslUnitPoints = n `div` 3 -- \* 1/72 * 25.4 -estimateInMillimeter n XslUnitPica = n * 4 -- \* 12 * 1/72 * 25.4 -estimateInMillimeter n XslUnitPixel = n `div`3 -- \* 1/72 * 25.4 -estimateInMillimeter n XslUnitEM = n * 7 -- \* 16 * 1/72 * 25.4 - - ----- --- List styles ----- - -type ListLevel = Int - -newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle - } - deriving ( Eq, Show ) - --- -getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle -getListLevelStyle level ListStyle{..} = - let (lower , exactHit , _) = M.splitLookup level levelStyles - in exactHit <|> fmap fst (M.maxView lower) - -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] - -- \^ simpler, but in general less efficient - -data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe String - , listItemSuffix :: Maybe String - , listItemFormat :: ListItemNumberFormat - , listItemStart :: Int - } - deriving ( Eq, Ord ) - -instance Show ListLevelStyle where - show ListLevelStyle{..} = "<LLS|" - ++ (show listLevelType) - ++ "|" - ++ (maybeToString listItemPrefix) - ++ (show listItemFormat) - ++ (maybeToString listItemSuffix) - ++ ">" - where maybeToString = fromMaybe "" - -data ListLevelType = LltBullet | LltImage | LltNumbered - deriving ( Eq, Ord, Show ) - -data ListItemNumberFormat = LinfNone - | LinfNumber - | LinfRomanLC | LinfRomanUC - | LinfAlphaLC | LinfAlphaUC - | LinfString String - deriving ( Eq, Ord ) - -instance Show ListItemNumberFormat where - show LinfNone = "" - show LinfNumber = "1" - show LinfRomanLC = "i" - show LinfRomanUC = "I" - show LinfAlphaLC = "a" - show LinfAlphaUC = "A" - show (LinfString s) = s - -instance Default ListItemNumberFormat where - def = LinfNone - -instance Read ListItemNumberFormat where - readsPrec _ "" = [(LinfNone , "")] - readsPrec _ "1" = [(LinfNumber , "")] - readsPrec _ "i" = [(LinfRomanLC , "")] - readsPrec _ "I" = [(LinfRomanUC , "")] - readsPrec _ "a" = [(LinfAlphaLC , "")] - readsPrec _ "A" = [(LinfAlphaUC , "")] - readsPrec _ s = [(LinfString s , "")] - --------------------------------------------------------------------------------- --- Readers --- --- ...it seems like a whole lot of this should be automatically deriveable --- or at least moveable into a class. Most of this is data concealed in --- code. --------------------------------------------------------------------------------- - --- -readAllStyles :: StyleReader _x Styles -readAllStyles = ( readFontPitches - >>?! ( readAutomaticStyles - &&& readStyles )) - >>?%? chooseMax - -- all top elements are always on the same hierarchy level - --- -readStyles :: StyleReader _x Styles -readStyles = executeIn NsOffice "styles" $ liftAsSuccess - $ liftA3 Styles - ( tryAll NsStyle "style" readStyle >>^ M.fromList ) - ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) - ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList ) - --- -readAutomaticStyles :: StyleReader _x Styles -readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess - $ liftA3 Styles - ( tryAll NsStyle "style" readStyle >>^ M.fromList ) - ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) - ( returnV M.empty ) - --- -readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties) -readDefaultStyle = lookupAttr NsStyle "family" - >>?! keepingTheValue readStyleProperties - --- -readStyle :: StyleReader _x (StyleName,Style) -readStyle = findAttr NsStyle "name" - >>?! keepingTheValue - ( liftA4 Style - ( lookupAttr' NsStyle "family" ) - ( findAttr' NsStyle "parent-style-name" ) - ( findAttr' NsStyle "list-style-name" ) - readStyleProperties - ) - --- -readStyleProperties :: StyleReaderSafe _x StyleProperties -readStyleProperties = liftA2 SProps - ( readTextProperties >>> choiceToMaybe ) - ( readParaProperties >>> choiceToMaybe ) - --- -readTextProperties :: StyleReader _x TextProperties -readTextProperties = - executeIn NsStyle "text-properties" $ liftAsSuccess - ( liftA6 PropT - ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) - ( searchAttr NsXSL_FO "font-weight" False isFontBold ) - ( findPitch ) - ( getAttr NsStyle "text-position" ) - ( readUnderlineMode ) - ( readStrikeThroughMode ) - ) - where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] - isFontBold = ("normal",False):("bold",True) - :(map ((,True).show) ([100,200..900]::[Int])) - -readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) -readUnderlineMode = readLineMode "text-underline-mode" - "text-underline-style" - -readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) -readStrikeThroughMode = readLineMode "text-line-through-mode" - "text-line-through-style" - -readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) -readLineMode modeAttr styleAttr = proc x -> do - isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x - mode <- lookupAttr' NsStyle modeAttr -< x - if isUL - then case mode of - Just m -> returnA -< Just m - Nothing -> returnA -< Just UnderlineModeNormal - else returnA -< Nothing - where - isLinePresent = [("none",False)] ++ map (,True) - [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" - , "long-dash" , "solid" , "wave" - ] - --- -readParaProperties :: StyleReader _x ParaProperties -readParaProperties = - executeIn NsStyle "paragraph-properties" $ liftAsSuccess - ( liftA3 PropP - ( liftA2 readNumbering - ( isSet' NsText "number-lines" ) - ( readAttr' NsText "line-number" ) - ) - ( liftA2 readIndentation - ( isSetWithDefault NsStyle "auto-text-indent" False ) - ( getAttr NsXSL_FO "text-indent" ) - ) - ( getAttr NsXSL_FO "margin-left" ) - ) - where readNumbering (Just True) (Just n) = NumberingRestart n - readNumbering (Just True) _ = NumberingKeep - readNumbering _ _ = NumberingNone - - readIndentation False indent = indent - readIndentation True _ = def - ----- --- List styles ----- - --- -readListStyle :: StyleReader _x (StyleName, ListStyle) -readListStyle = - findAttr NsStyle "name" - >>?! keepingTheValue - ( liftA ListStyle - $ ( liftA3 SM.union3 - ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) - ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) - ( readListLevelStyles NsText "list-level-style-image" LltImage ) - ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle - ) --- -readListLevelStyles :: Namespace -> ElementName - -> ListLevelType - -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) -readListLevelStyles namespace elementName levelType = - ( tryAll namespace elementName (readListLevelStyle levelType) - >>^ SM.fromList - ) - --- -readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) -readListLevelStyle levelType = readAttr NsText "level" - >>?! keepingTheValue - ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttr' NsText "start-value" ) - ) - where - toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) - toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b) - toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b) - startValue (Just "") = 1 - startValue (Just v) = if all isDigit v - then read v - else 1 - startValue Nothing = 1 - --- -chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle -chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing - | otherwise = Just ( F.foldr1 select ls ) - where - select ( ListLevelStyle t1 p1 s1 f1 b1 ) - ( ListLevelStyle t2 p2 s2 f2 _ ) - = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1 - select' LltNumbered _ = LltNumbered - select' _ LltNumbered = LltNumbered - select' _ _ = LltBullet - selectLinf LinfNone f2 = f2 - selectLinf f1 LinfNone = f1 - selectLinf (LinfString _) f2 = f2 - selectLinf f1 (LinfString _) = f1 - selectLinf f1 _ = f1 - - --------------------------------------------------------------------------------- --- Tools to access style data --------------------------------------------------------------------------------- - --- -lookupStyle :: StyleName -> Styles -> Maybe Style -lookupStyle name Styles{..} = M.lookup name stylesByName - --- -lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties -lookupDefaultStyle family Styles{..} = fromMaybe def - (M.lookup family defaultStyleMap) - --- -lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties -lookupDefaultStyle' Styles{..} family = fromMaybe def - (M.lookup family defaultStyleMap) - --- -getListStyle :: Style -> Styles -> Maybe ListStyle -getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) - --- -lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle -lookupListStyleByName name Styles{..} = M.lookup name listStylesByName - - --- | Returns a chain of parent of the current style. The direct parent will --- be the first element of the list, followed by its parent and so on. --- The current style is not in the list. -parents :: Style -> Styles -> [Style] -parents style styles = unfoldr findNextParent style -- Ha! - where findNextParent Style{..} - = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName - --- | Looks up the style family of the current style. Normally, every style --- should have one. But if not, all parents are searched. -getStyleFamily :: Style -> Styles -> Maybe StyleFamily -getStyleFamily style@Style{..} styles - = styleFamily - <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles) - --- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property --- values are specified. Instead, a value might be inherited from a --- parent style. This function makes this chain of inheritance --- concrete and easily accessible by encapsulating the necessary lookups. --- The resulting list contains the direct properties of the style as the first --- element, the ones of the direct parent element as the next one, and so on. --- --- Note: There should also be default properties for each style family. These --- are @not@ contained in this list because properties inherited from --- parent elements take precedence over default styles. --- --- This function is primarily meant to be used through convenience wrappers. --- -stylePropertyChain :: Style -> Styles -> [StyleProperties] -stylePropertyChain style styles - = map styleProperties (style : parents style styles) - --- -extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] -extendedStylePropertyChain [] _ = [] -extendedStylePropertyChain [style] styles = (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) -extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) - ++ (extendedStylePropertyChain trace styles) --- Optimizable with Data.Sequence - --- -extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] -extendedStylePropertyChain' [] _ = Nothing -extendedStylePropertyChain' [style] styles = Just ( - (stylePropertyChain style styles) - ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) - ) -extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) - (extendedStylePropertyChain' trace styles) - --- -stylePropertyChain' :: Styles -> Style -> [StyleProperties] -stylePropertyChain' = flip stylePropertyChain - --- -getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] -getStylePropertyChain name styles = maybe [] - (`stylePropertyChain` styles) - (lookupStyle name styles) - --- -getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] -getPropertyChain extract style styles = catMaybes - $ map extract - $ stylePropertyChain style styles - --- -textPropertyChain :: Style -> Styles -> [TextProperties] -textPropertyChain = getPropertyChain textProperties - --- -paraPropertyChain :: Style -> Styles -> [ParaProperties] -paraPropertyChain = getPropertyChain paraProperties - --- -getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a -getTextProperty extract style styles = fmap extract - $ listToMaybe - $ textPropertyChain style styles - --- -getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a -getTextProperty' extract style styles = F.asum - $ map extract - $ textPropertyChain style styles - --- -getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a -getParaProperty extract style styles = fmap extract - $ listToMaybe - $ paraPropertyChain style styles - --- | Lifts the reader into another readers' state. -liftStyles :: (OdtConverterState s -> OdtConverterState Styles) - -> (OdtConverterState Styles -> OdtConverterState s ) - -> XMLReader s x x -liftStyles extract inject = switchState extract inject - $ convertingExtraState M.empty readAllStyles - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs deleted file mode 100644 index c8dbbf45a..000000000 --- a/src/Text/Pandoc/Readers/Org.hs +++ /dev/null @@ -1,62 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Conversion of org-mode formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Org ( readOrg ) where - -import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) -import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) -import Text.Pandoc.Readers.Org.ParserState ( optionsToParserState ) - -import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Error -import Text.Pandoc.Options - -import Control.Monad.Except ( throwError ) -import Control.Monad.Reader ( runReaderT ) - - --- | Parse org-mode string and return a Pandoc document. -readOrg :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readOrg opts s = do - parsed <- flip runReaderT def $ - readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n") - case parsed of - Right result -> return result - Left _ -> throwError $ PandocParseError "problem parsing org" - --- --- Parser --- -parseOrg :: PandocMonad m => OrgParser m Pandoc -parseOrg = do - blocks' <- blockList - meta' <- meta - return $ Pandoc meta' blocks' diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs deleted file mode 100644 index 5588c4552..000000000 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ /dev/null @@ -1,137 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode inline elements. --} -module Text.Pandoc.Readers.Org.BlockStarts - ( exampleLineStart - , hline - , noteMarker - , tableStart - , drawerStart - , headerStart - , metaLineStart - , latexEnvStart - , commentLineStart - , bulletListStart - , orderedListStart - , endOfBlock - ) where - -import Control.Monad ( void ) -import Text.Pandoc.Readers.Org.Parsing - --- | Horizontal Line (five -- dashes or more) -hline :: Monad m => OrgParser m () -hline = try $ do - skipSpaces - string "-----" - many (char '-') - skipSpaces - newline - return () - --- | Read the start of a header line, return the header level -headerStart :: Monad m => OrgParser m Int -headerStart = try $ - (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos - -tableStart :: Monad m => OrgParser m Char -tableStart = try $ skipSpaces *> char '|' - -latexEnvStart :: Monad m => OrgParser m String -latexEnvStart = try $ do - skipSpaces *> string "\\begin{" - *> latexEnvName - <* string "}" - <* blankline - where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") - - --- | Parses bullet list marker. -bulletListStart :: Monad m => OrgParser m () -bulletListStart = try $ - choice - [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1 - , () <$ skipSpaces1 <* char '*' <* skipSpaces1 - ] - -genericListStart :: Monad m - => OrgParser m String - -> OrgParser m Int -genericListStart listMarker = try $ - (+) <$> (length <$> many spaceChar) - <*> (length <$> listMarker <* many1 spaceChar) - -orderedListStart :: Monad m => OrgParser m Int -orderedListStart = genericListStart orderedListMarker - -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") - -drawerStart :: Monad m => OrgParser m String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') - -metaLineStart :: Monad m => OrgParser m () -metaLineStart = try $ skipSpaces <* string "#+" - -commentLineStart :: Monad m => OrgParser m () -commentLineStart = try $ skipSpaces <* string "# " - -exampleLineStart :: Monad m => OrgParser m () -exampleLineStart = () <$ try (skipSpaces *> string ": ") - -noteMarker :: Monad m => OrgParser m String -noteMarker = try $ do - char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') - ] - --- | Succeeds if the parser is at the end of a block. -endOfBlock :: Monad m => OrgParser m () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart - where - -- Succeeds if there is a new block starting at this position. - anyBlockStart :: Monad m => OrgParser m () - anyBlockStart = try . choice $ - [ exampleLineStart - , hline - , metaLineStart - , commentLineStart - , void noteMarker - , void tableStart - , void drawerStart - , void headerStart - , void latexEnvStart - , void bulletListStart - , void orderedListStart - ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs deleted file mode 100644 index 78ac8d0d1..000000000 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ /dev/null @@ -1,979 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} -{- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2017 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode block elements. --} -module Text.Pandoc.Readers.Org.Blocks - ( blockList - , meta - ) where - -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks ) -import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead ) - -import Control.Monad ( foldM, guard, mzero, void ) -import Data.Char ( isSpace, toLower, toUpper) -import Data.Default ( Default ) -import Data.List ( foldl', isPrefixOf ) -import Data.Maybe ( fromMaybe, isNothing ) -import Data.Monoid ((<>)) - --- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: PandocMonad m => Int -> OrgParser m (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: Monad m => OrgParser m () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: Monad m => OrgParser m [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: Monad m => OrgParser m TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - - --- --- parsing blocks --- - --- | Get a list of blocks. -blockList :: PandocMonad m => OrgParser m [Block] -blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof - st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks - --- | Get the meta information safed in the state. -meta :: Monad m => OrgParser m Meta -meta = do - meta' <- metaExport - runF meta' <$> getState - -blocks :: PandocMonad m => OrgParser m (F Blocks) -blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) - -block :: PandocMonad m => OrgParser m (F Blocks) -block = choice [ mempty <$ blanklines - , table - , orgBlock - , figure - , example - , genericDrawer - , specialLine - , horizontalRule - , list - , latexFragment - , noteBlock - , paraOrPlain - ] <?> "block" - - --- --- Block Attributes --- - --- | Attributes that may be added to figures (like a name or caption). -data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String - , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] - } - --- | Convert BlockAttributes into pandoc Attr -attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = - let - ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues - classes = case lookup "class" blockAttrKeyValues of - Nothing -> [] - Just clsStr -> words clsStr - kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues - in (ident, classes, kv) - -stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) -stringyMetaAttribute attrCheck = try $ do - metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName - skipSpaces - attrValue <- anyLine - return (attrName, attrValue) - -blockAttributes :: PandocMonad m => OrgParser m BlockAttributes -blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) - let caption = foldl' (appendValues "CAPTION") Nothing kv - let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv - let name = lookup "NAME" kv - let label = lookup "LABEL" kv - caption' <- case caption of - Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes - { blockAttrName = name - , blockAttrLabel = label - , blockAttrCaption = caption' - , blockAttrKeyValues = kvAttrs' - } - where - attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False - - appendValues :: String -> Maybe String -> (String, String) -> Maybe String - appendValues attrName accValue (key, value) = - if key /= attrName - then accValue - else case accValue of - Just acc -> Just $ acc ++ ' ':value - Nothing -> Just value - -keyValues :: Monad m => OrgParser m [(String, String)] -keyValues = try $ - manyTill ((,) <$> key <*> value) newline - where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue - - endOfValue :: Monad m => OrgParser m () - endOfValue = - lookAhead $ (() <$ try (many1 spaceChar <* key)) - <|> () <$ newline - - --- --- Org Blocks (#+BEGIN_... / #+END_...) --- - --- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. -orgBlock :: PandocMonad m => OrgParser m (F Blocks) -orgBlock = try $ do - blockAttrs <- blockAttributes - blkType <- blockHeaderStart - ($ blkType) $ - case (map toLower blkType) of - "export" -> exportBlock - "comment" -> rawBlockLines (const mempty) - "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "example" -> rawBlockLines (return . exampleCode) - "quote" -> parseBlockLines (fmap B.blockQuote) - "verse" -> verseBlock - "src" -> codeBlock blockAttrs - _ -> parseBlockLines $ - let (ident, classes, kv) = attrFromBlockAttributes blockAttrs - in fmap $ B.divWith (ident, classes ++ [blkType], kv) - where - blockHeaderStart :: Monad m => OrgParser m String - blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - - lowercase :: String -> String - lowercase = map toLower - -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) - -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) - where - parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) - parsedBlockContent = try $ do - raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") - --- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String -rawBlockContent blockType = try $ do - blkLines <- manyTill rawLine blockEnder - tabLen <- getOption readerTabStop - return - . unlines - . stripIndent - . map (tabsToSpaces tabLen . commaEscaped) - $ blkLines - where - rawLine :: Monad m => OrgParser m String - rawLine = try $ ("" <$ blankline) <|> anyLine - - blockEnder :: Monad m => OrgParser m () - blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - - stripIndent :: [String] -> [String] - stripIndent strs = map (drop (shortestIndent strs)) strs - - shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs - --- | Read but ignore all remaining block headers. -ignHeaders :: Monad m => OrgParser m () -ignHeaders = (() <$ newline) <|> (() <$ anyLine) - --- | Read a block containing code intended for export in specific backends --- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) -exportBlock blockType = try $ do - exportType <- skipSpaces *> orgArgWord <* ignHeaders - contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) - -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) -verseBlock blockType = try $ do - ignHeaders - content <- rawBlockContent blockType - fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) - where - -- replace initial spaces with nonbreaking spaces to preserve - -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) - parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces - then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") - return (trimInlinesF $ pure nbspIndent <> line) - --- | Read a code block and the associated results block if present. Which of --- boths blocks is included in the output is determined using the "exports" --- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) -codeBlock blockAttrs blockType = do - skipSpaces - (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock - let id' = fromMaybe mempty $ blockAttrName blockAttrs - let includeCode = exportsCode kv - let includeResults = exportsResults kv - let codeBlck = B.codeBlockWith ( id', classes, kv ) content - let labelledBlck = maybe (pure codeBlck) - (labelDiv codeBlck) - (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent - return $ - (if includeCode then labelledBlck else mempty) <> - (if includeResults then resultBlck else mempty) - where - labelDiv :: Blocks -> F Inlines -> F Blocks - labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) - - labelledBlock :: F Inlines -> F Blocks - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) - -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs - -trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do - blanklines - stringAnyCase "#+RESULTS:" - blankline - block - --- | Parse code block arguments --- TODO: We currently don't handle switches. -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) -codeHeaderArgs = try $ do - language <- skipSpaces *> orgArgWord - _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) - parameters <- manyTill blockOption newline - let pandocLang = translateLang language - return $ - if hasRundocParameters parameters - then ( [ pandocLang, rundocBlockClass ] - , map toRundocAttrib (("language", language) : parameters) - ) - else ([ pandocLang ], parameters) - where - hasRundocParameters = not . null - -switch :: Monad m => OrgParser m (Char, Maybe String) -switch = try $ simpleSwitch <|> lineNumbersSwitch - where - simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) - lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> - (string "-l \"" *> many1Till nonspaceChar (char '"')) - -blockOption :: Monad m => OrgParser m (String, String) -blockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgParamValue - return (argKey, paramValue) - -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ - skipSpaces - *> notFollowedBy (char ':' ) - *> many1 nonspaceChar - <* skipSpaces - -horizontalRule :: Monad m => OrgParser m (F Blocks) -horizontalRule = return B.horizontalRule <$ try hline - - --- --- Drawers --- - --- | A generic drawer which has no special meaning for org-mode. --- Whether or not this drawer is included in the output depends on the drawers --- export setting. -genericDrawer :: PandocMonad m => OrgParser m (F Blocks) -genericDrawer = try $ do - name <- map toUpper <$> drawerStart - content <- manyTill drawerLine (try drawerEnd) - state <- getState - -- Include drawer if it is explicitly included in or not explicitly excluded - -- from the list of drawers that should be exported. PROPERTIES drawers are - -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of - _ | name == "PROPERTIES" -> return mempty - Left names | name `elem` names -> return mempty - Right names | name `notElem` names -> return mempty - _ -> drawerDiv name <$> parseLines content - where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines - - drawerDiv :: String -> F Blocks -> F Blocks - drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) - -drawerLine :: Monad m => OrgParser m String -drawerLine = anyLine - -drawerEnd :: Monad m => OrgParser m String -drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline - --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: Monad m => OrgParser m Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: Monad m => OrgParser m (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: Monad m => OrgParser m PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: Monad m => OrgParser m PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - - --- --- Figures --- - --- | Figures or an image paragraph (i.e. an image on a line by itself). Only --- images with a caption attribute are interpreted as figures. -figure :: PandocMonad m => OrgParser m (F Blocks) -figure = try $ do - figAttrs <- blockAttributes - src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of - Nothing -> mzero - Just imgSrc -> do - guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs - return $ imageBlock isFigure figAttrs imgSrc - where - selfTarget :: PandocMonad m => OrgParser m String - selfTarget = try $ char '[' *> linkTarget <* char ']' - - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks - imageBlock isFigure figAttrs imgSrc = - let - figName = fromMaybe mempty $ blockAttrName figAttrs - figLabel = fromMaybe mempty $ blockAttrLabel figAttrs - figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - figKeyVals = blockAttrKeyValues figAttrs - attr = (figLabel, mempty, figKeyVals) - figTitle = (if isFigure then withFigPrefix else id) figName - in - B.para . B.imageWith attr imgSrc figTitle <$> figCaption - - withFigPrefix :: String -> String - withFigPrefix cs = - if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - --- | Succeeds if looking at the end of the current paragraph -endOfParagraph :: Monad m => OrgParser m () -endOfParagraph = try $ skipSpaces *> newline *> endOfBlock - - --- --- Examples --- - --- | Example code marked up by a leading colon. -example :: Monad m => OrgParser m (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine - where - exampleLine :: Monad m => OrgParser m String - exampleLine = try $ exampleLineStart *> anyLine - -exampleCode :: String -> Blocks -exampleCode = B.codeBlockWith ("", ["example"], []) - - --- --- Comments, Options and Metadata --- - -specialLine :: PandocMonad m => OrgParser m (F Blocks) -specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine - -rawExportLine :: PandocMonad m => OrgParser m Blocks -rawExportLine = try $ do - metaLineStart - key <- metaKey - if key `elem` ["latex", "html", "texinfo", "beamer"] - then B.rawBlock key <$> anyLine - else mzero - -commentLine :: Monad m => OrgParser m Blocks -commentLine = commentLineStart *> anyLine *> pure mempty - - --- --- Tables --- -data ColumnProperty = ColumnProperty - { columnAlignment :: Maybe Alignment - , columnRelWidth :: Maybe Int - } deriving (Show, Eq) - -instance Default ColumnProperty where - def = ColumnProperty Nothing Nothing - -data OrgTableRow = OrgContentRow (F [Blocks]) - | OrgAlignRow [ColumnProperty] - | OrgHlineRow - --- OrgTable is strongly related to the pandoc table ADT. Using the same --- (i.e. pandoc-global) ADT would mean that the reader would break if the --- global structure was to be changed, which would be bad. The final table --- should be generated using a builder function. -data OrgTable = OrgTable - { orgTableColumnProperties :: [ColumnProperty] - , orgTableHeader :: [Blocks] - , orgTableRows :: [[Blocks]] - } - -table :: PandocMonad m => OrgParser m (F Blocks) -table = try $ do - blockAttrs <- blockAttributes - lookAhead tableStart - do - rows <- tableRows - let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs - return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows - -orgToPandocTable :: OrgTable - -> Inlines - -> Blocks -orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) - then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps - else Nothing - in B.table caption (map (convertColProp totalWidth) colProps) heads lns - where - convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double) - convertColProp totalWidth colProp = - let - align' = fromMaybe AlignDefault $ columnAlignment colProp - width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) - <*> totalWidth - in (align', width') - -tableRows :: PandocMonad m => OrgParser m [OrgTableRow] -tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) - -tableContentRow :: PandocMonad m => OrgParser m OrgTableRow -tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) - -tableContentCell :: PandocMonad m => OrgParser m (F Blocks) -tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell - -tableAlignRow :: Monad m => OrgParser m OrgTableRow -tableAlignRow = try $ do - tableStart - colProps <- many1Till columnPropertyCell newline - -- Empty rows are regular (i.e. content) rows, not alignment rows. - guard $ any (/= def) colProps - return $ OrgAlignRow colProps - -columnPropertyCell :: Monad m => OrgParser m ColumnProperty -columnPropertyCell = emptyCell <|> propCell <?> "alignment info" - where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) - propCell = try $ ColumnProperty - <$> (skipSpaces - *> char '<' - *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) - <* char '>' - <* emptyCell) - -tableAlignFromChar :: Monad m => OrgParser m Alignment -tableAlignFromChar = try $ - choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] - -tableHline :: Monad m => OrgParser m OrgTableRow -tableHline = try $ - OrgHlineRow <$ (tableStart *> char '-' *> anyLine) - -endOfCell :: Monad m => OrgParser m Char -endOfCell = try $ char '|' <|> lookAhead newline - -rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM rowToContent emptyTable - where emptyTable = OrgTable mempty mempty mempty - -normalizeTable :: OrgTable -> OrgTable -normalizeTable (OrgTable colProps heads rows) = - OrgTable colProps' heads rows - where - refRow = if heads /= mempty - then heads - else case rows of - (r:_) -> r - _ -> mempty - cols = length refRow - fillColumns base padding = take cols $ base ++ repeat padding - colProps' = fillColumns colProps def - --- One or more horizontal rules after the first content line mark the previous --- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTable - -> OrgTableRow - -> F OrgTable -rowToContent orgTable row = - case row of - OrgHlineRow -> return singleRowPromotedToHeader - OrgAlignRow props -> return . setProperties $ props - OrgContentRow cs -> appendToBody cs - where - singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable - - setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } - - appendToBody :: F [Blocks] -> F OrgTable - appendToBody frow = do - newRow <- frow - let oldRows = orgTableRows orgTable - -- NOTE: This is an inefficient O(n) operation. This should be changed - -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } - - --- --- LaTeX fragments --- -latexFragment :: Monad m => OrgParser m (F Blocks) -latexFragment = try $ do - envName <- latexEnvStart - content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) - where - c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" - , c - , "\\end{", e, "}\n" - ] - -latexEnd :: Monad m => String -> OrgParser m () -latexEnd envName = try $ - () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") - <* blankline - - --- --- Footnote defintions --- -noteBlock :: PandocMonad m => OrgParser m (F Blocks) -noteBlock = try $ do - ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillHeaderOrNote - addToNotesTable (ref, content) - return mempty - where - blocksTillHeaderOrNote = - many1Till block (eof <|> () <$ lookAhead noteMarker - <|> () <$ lookAhead headerStart) - --- Paragraphs or Plain text -paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) -paraOrPlain = try $ do - -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) - ils <- inlines - nl <- option False (newline *> return True) - -- Read block as paragraph, except if we are in a list context and the block - -- is directly followed by a list item, in which case the block is read as - -- plain text. - try (guard nl - *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) - *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) - - --- --- list blocks --- - -list :: PandocMonad m => OrgParser m (F Blocks) -list = choice [ definitionList, bulletList, orderedList ] <?> "list" - -definitionList :: PandocMonad m => OrgParser m (F Blocks) -definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactifyDL . sequence - <$> many1 (definitionListItem $ bulletListStart' (Just n)) - -bulletList :: PandocMonad m => OrgParser m (F Blocks) -bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify . sequence - <$> many1 (listItem (bulletListStart' $ Just n)) - -orderedList :: PandocMonad m => OrgParser m (F Blocks) -orderedList = fmap B.orderedList . fmap compactify . sequence - <$> many1 (listItem orderedListStart) - -bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int --- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- length <$> many spaceChar - oneOf (bullets $ ind == 0) - skipSpaces1 - return (ind + 1) -bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf (bullets $ n == 1) - many1 spaceChar - return n - --- Unindented lists are legal, but they can't use '*' bullets. --- We return n to maintain compatibility with the generic listItem. -bullets :: Bool -> String -bullets unindented = if unindented then "+-" else "*+-" - -definitionListItem :: PandocMonad m - => OrgParser m Int - -> OrgParser m (F (Inlines, [Blocks])) -definitionListItem parseMarkerGetLength = try $ do - markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try definitionMarker) - line1 <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' - where - definitionMarker = - spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) - - --- parse raw text for one list item, excluding start marker and continuations -listItem :: PandocMonad m - => OrgParser m Int - -> OrgParser m (F Blocks) -listItem start = try . withContext ListItemState $ do - markerLength <- try start - firstLine <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest - --- continuation of a list item - indented and separated by blankline or endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int - -> OrgParser m String -listContinuation markerLength = try $ - notFollowedBy' blankline - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) - where - listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Monad m => Int -> OrgParser m String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: Monad m => OrgParser m String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs deleted file mode 100644 index 391877c03..000000000 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ /dev/null @@ -1,172 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode export options. --} -module Text.Pandoc.Readers.Org.ExportSettings - ( exportSettings - ) where - -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing - -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.Maybe ( listToMaybe ) - --- | Read and handle space separated org-mode export settings. -exportSettings :: Monad m => OrgParser m () -exportSettings = void $ sepBy spaces exportSetting - --- | Setter function for export settings. -type ExportSettingSetter a = a -> ExportSettings -> ExportSettings - --- | Read and process a single org-mode export option. -exportSetting :: Monad m => OrgParser m () -exportSetting = choice - [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val }) - , booleanSetting "'" (\val es -> es { exportSmartQuotes = val }) - , booleanSetting "*" (\val es -> es { exportEmphasizedText = val }) - , booleanSetting "-" (\val es -> es { exportSpecialStrings = val }) - , ignoredSetting ":" - , ignoredSetting "<" - , ignoredSetting "\\n" - , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) - , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) - , ignoredSetting "c" - -- org-mode allows the special value `comment` for creator, which we'll - -- interpret as true as it doesn't make sense in the context of Pandoc. - , booleanSetting "creator" (\val es -> es { exportWithCreator = val }) - , complementableListSetting "d" (\val es -> es { exportDrawers = val }) - , ignoredSetting "date" - , ignoredSetting "e" - , booleanSetting "email" (\val es -> es { exportWithEmail = val }) - , ignoredSetting "f" - , integerSetting "H" (\val es -> es { exportHeadlineLevels = val }) - , ignoredSetting "inline" - , ignoredSetting "num" - , ignoredSetting "p" - , ignoredSetting "pri" - , ignoredSetting "prop" - , ignoredSetting "stat" - , ignoredSetting "tags" - , ignoredSetting "tasks" - , ignoredSetting "tex" - , ignoredSetting "timestamp" - , ignoredSetting "title" - , ignoredSetting "toc" - , booleanSetting "todo" (\val es -> es { exportWithTodoKeywords = val }) - , ignoredSetting "|" - ] <?> "export setting" - -genericExportSetting :: Monad m - => OrgParser m a - -> String - -> ExportSettingSetter a - -> OrgParser m () -genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' - value <- optionParser - updateState $ modifyExportSettings value - where - modifyExportSettings val st = - st { orgStateExportSettings = setter val . orgStateExportSettings $ st } - --- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () -booleanSetting = genericExportSetting elispBoolean - --- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () -integerSetting = genericExportSetting parseInt - where - parseInt = try $ - many1 digit >>= maybe mzero (return . fst) . listToMaybe . reads - --- | Either the string "headline" or an elisp boolean and treated as an --- @ArchivedTreesOption@. -archivedTreeSetting :: Monad m - => String - -> ExportSettingSetter ArchivedTreesOption - -> OrgParser m () -archivedTreeSetting = - genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean - where - archivedTreesHeadlineSetting = try $ do - _ <- string "headline" - lookAhead (newline <|> spaceChar) - return ArchivedTreesHeadlineOnly - - archivedTreesBoolean = try $ do - exportBool <- elispBoolean - return $ - if exportBool - then ArchivedTreesExport - else ArchivedTreesNoExport - --- | A list or a complement list (i.e. a list starting with `not`). -complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) - -> OrgParser m () -complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList - , Right <$> stringList - , (\b -> if b then Left [] else Right []) <$> elispBoolean - ] - where - -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] - stringList = try $ - char '(' - *> sepBy elispString spaces - <* char ')' - - -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ - string "(not " - *> sepBy elispString spaces - <* char ')' - - elispString :: Monad m => OrgParser m String - elispString = try $ - char '"' - *> manyTill alphaNum (char '"') - --- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) - --- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are --- interpreted as true. -elispBoolean :: Monad m => OrgParser m Bool -elispBoolean = try $ do - value <- many1 nonspaceChar - return $ case map toLower value of - "nil" -> False - "{}" -> False - "()" -> False - _ -> True diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs deleted file mode 100644 index f3671641a..000000000 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ /dev/null @@ -1,880 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode inline elements. --} -module Text.Pandoc.Readers.Org.Inlines - ( inline - , inlines - , addToNotesTable - , linkTarget - ) where - -import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) -import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) -import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Text.Pandoc.Class (PandocMonad) - -import Prelude hiding (sequence) -import Control.Monad ( guard, mplus, mzero, when, void ) -import Control.Monad.Trans ( lift ) -import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse ) -import Data.Maybe ( fromMaybe ) -import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Data.Traversable (sequence) - --- --- Functions acting on the parser state --- -recordAnchorId :: PandocMonad m => String -> OrgParser m () -recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } - -pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () -pushToInlineCharStack c = updateState $ \s -> - s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } - -popInlineCharStack :: PandocMonad m => OrgParser m () -popInlineCharStack = updateState $ \s -> - s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } - -surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] -surroundingEmphasisChar = - take 1 . drop 1 . orgStateEmphasisCharStack <$> getState - -startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () -startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Just maxNewlines } - -decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () -decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } - -newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool -newlinesCountWithinLimits = do - st <- getState - return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True - -resetEmphasisNewlines :: PandocMonad m => OrgParser m () -resetEmphasisNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Nothing } - -addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () -addToNotesTable note = do - oldnotes <- orgStateNotes' <$> getState - updateState $ \s -> s{ orgStateNotes' = note:oldnotes } - --- | Parse a single Org-mode inline element -inline :: PandocMonad m => OrgParser m (F Inlines) -inline = - choice [ whitespace - , linebreak - , cite - , footnote - , linkOrImage - , anchor - , inlineCodeBlock - , str - , endline - , emphasizedText - , code - , math - , displayMath - , verbatim - , subscript - , superscript - , inlineLaTeX - , exportSnippet - , smart - , symbol - ] <* (guard =<< newlinesCountWithinLimits) - <?> "inline" - --- | Read the rest of the input as inlines. -inlines :: PandocMonad m => OrgParser m (F Inlines) -inlines = trimInlinesF . mconcat <$> many1 inline - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" - - -whitespace :: PandocMonad m => OrgParser m (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - <?> "whitespace" - -linebreak :: PandocMonad m => OrgParser m (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline - -str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - <* updateLastStrPos - --- | An endline character that can be treated as a space, not a structural --- break. This should reflect the values of the Emacs variable --- @org-element-pagaraph-separate@. -endline :: PandocMonad m => OrgParser m (F Inlines) -endline = try $ do - newline - notFollowedBy' endOfBlock - decEmphasisNewlinesCount - guard =<< newlinesCountWithinLimits - updateLastPreCharPos - return . return $ B.softbreak - - --- --- Citations --- - --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. - -cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do - guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite - , orgRefCite - , berkeleyTextualCite - ] - return $ (flip B.cite (B.text raw)) <$> cs - --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' - -orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -orgRefCite = try $ choice - [ normalOrgRefCite - , fmap (:[]) <$> linkLikeOrgRefCite - ] - -normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -normalOrgRefCite = try $ do - mode <- orgRefCiteMode - firstCitation <- orgRefCiteList mode - moreCitations <- many (try $ char ',' *> orgRefCiteList mode) - return . sequence $ firstCitation : moreCitations - where - -- | A list of org-ref style citation keys, parsed as citation of the given - -- citation mode. - orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) - orgRefCiteList citeMode = try $ do - key <- orgRefCiteKey - returnF $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = citeMode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' citeKey - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair '[' ']' berkeleyTextualCite - --- | Read a link-like org-ref style citation. The citation includes pre and --- post text. However, multiple citations are not possible due to limitations --- in the syntax. -linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) -linkLikeOrgRefCite = try $ do - _ <- string "[[" - mode <- orgRefCiteMode - key <- orgRefCiteKey - _ <- string "][" - pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::") - spc <- option False (True <$ spaceChar) - suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]") - return $ do - pre' <- pre - suf' <- suf - return Citation - { citationId = key - , citationPrefix = B.toList pre' - , citationSuffix = B.toList (if spc then B.space <> suf' else suf') - , citationMode = mode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read a citation key. The characters allowed in citation keys are taken --- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) - --- | Supported citation types. Only a small subset of org-ref types is --- supported for now. TODO: rewrite this, use LaTeX reader as template. -orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode -orgRefCiteMode = - choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) - [ ("cite", AuthorInText) - , ("citep", NormalCitation) - , ("citep*", NormalCitation) - , ("citet", AuthorInText) - , ("citet*", AuthorInText) - , ("citeyear", SuppressAuthor) - ] - -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey - suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - -footnote :: PandocMonad m => OrgParser m (F Inlines) -footnote = try $ inlineNote <|> referencedNote - -inlineNote :: PandocMonad m => OrgParser m (F Inlines) -inlineNote = try $ do - string "[fn:" - ref <- many alphaNum - char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ - addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note - -referencedNote :: PandocMonad m => OrgParser m (F Inlines) -referencedNote = try $ do - ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' - case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' - -linkOrImage :: PandocMonad m => OrgParser m (F Inlines) -linkOrImage = explicitOrImageLink - <|> selflinkOrImage - <|> angleLink - <|> plainLink - <?> "link or image" - -explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) -explicitOrImageLink = try $ do - char '[' - srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget - title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat <$> many inline) title - char ']' - return $ do - src <- srcF - case cleanLinkString title of - Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty - _ -> - linkToInlinesF src =<< title' - -selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) -selflinkOrImage = try $ do - src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) - -plainLink :: PandocMonad m => OrgParser m (F Inlines) -plainLink = try $ do - (orig, src) <- uri - returnF $ B.link src "" (B.str orig) - -angleLink :: PandocMonad m => OrgParser m (F Inlines) -angleLink = try $ do - char '<' - link <- plainLink - char '>' - return link - -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") - -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String -possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") - -applyCustomLinkFormat :: String -> OrgParser m (F String) -applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter - --- | Take a link and return a function which produces new inlines when given --- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - (Just cleanedLink) -> if isImageFilename cleanedLink - then const . pure $ B.image cleanedLink "" "" - else pure . B.link cleanedLink "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines -internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB - then return $ B.link ('#':link) "" title - else return $ B.emph title - --- | Parse an anchor like @<<anchor-id>>@ and return an empty span with --- @anchor-id@ set as id. Legal anchors in org-mode are defined through --- @org-target-regexp@, which is fairly liberal. Since no link is created if --- @anchor-id@ contains spaces, we are more restrictive in what is accepted as --- an anchor. - -anchor :: PandocMonad m => OrgParser m (F Inlines) -anchor = try $ do - anchorId <- parseAnchor - recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty - where - parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - <* skipSpaces - --- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors --- the org function @org-export-solidify-link-text@. - -solidify :: String -> String -solidify = map replaceSpecialChar - where replaceSpecialChar c - | isAlphaNum c = c - | c `elem` ("_.-:" :: String) = c - | otherwise = '-' - --- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) -inlineCodeBlock = try $ do - string "src_" - lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") - let attrClasses = [translateLang lang, rundocBlockClass] - let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) - inlineBlockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgInlineParamValue - return (argKey, paramValue) - - orgInlineParamValue :: PandocMonad m => OrgParser m String - orgInlineParamValue = try $ - skipSpaces - *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") - <* skipSpaces - - -emphasizedText :: PandocMonad m => OrgParser m (F Inlines) -emphasizedText = do - state <- getState - guard . exportEmphasizedText . orgStateExportSettings $ state - try $ choice - [ emph - , strong - , strikeout - , underline - ] - -enclosedByPair :: PandocMonad m - => Char -- ^ opening char - -> Char -- ^ closing char - -> OrgParser m a -- ^ parser - -> OrgParser m [a] -enclosedByPair s e p = char s *> many1Till p (char e) - -emph :: PandocMonad m => OrgParser m (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' - -strong :: PandocMonad m => OrgParser m (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' - -strikeout :: PandocMonad m => OrgParser m (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' - --- There is no underline, so we use strong instead. -underline :: PandocMonad m => OrgParser m (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' - -verbatim :: PandocMonad m => OrgParser m (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' - -code :: PandocMonad m => OrgParser m (F Inlines) -code = return . B.code <$> verbatimBetween '~' - -subscript :: PandocMonad m => OrgParser m (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) - -superscript :: PandocMonad m => OrgParser m (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) - -math :: PandocMonad m => OrgParser m (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' - , rawMathBetween "\\(" "\\)" - ] - -displayMath :: PandocMonad m => OrgParser m (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] - -updatePositions :: PandocMonad m - => Char - -> OrgParser m Char -updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c - -symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - -emphasisBetween :: PandocMonad m - => Char - -> OrgParser m (F Inlines) -emphasisBetween c = try $ do - startEmphasisNewlinesCounting emphasisAllowedNewlines - res <- enclosedInlines (emphasisStart c) (emphasisEnd c) - isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState - when isTopLevelEmphasis - resetEmphasisNewlines - return res - -verbatimBetween :: PandocMonad m - => Char - -> OrgParser m String -verbatimBetween c = try $ - emphasisStart c *> - many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) - where - verbatimChar = noneOf "\n\r" >>= updatePositions - --- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m - => Char - -> OrgParser m String -mathStringBetween c = try $ do - mathStart c - body <- many1TillNOrLessNewlines mathAllowedNewlines - (noneOf (c:"\n\r")) - (lookAhead $ mathEnd c) - final <- mathEnd c - return $ body ++ [final] - --- | Parse a single character between @c@ using math rules -math1CharBetween :: PandocMonad m - => Char - -> OrgParser m String -math1CharBetween c = try $ do - char c - res <- noneOf $ c:mathForbiddenBorderChars - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] - -rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) - --- | Parses the start (opening character) of emphasis -emphasisStart :: PandocMonad m => Char -> OrgParser m Char -emphasisStart c = try $ do - guard =<< afterEmphasisPreChar - guard =<< notAfterString - char c - lookAhead (noneOf emphasisForbiddenBorderChars) - pushToInlineCharStack c - -- nested inlines are allowed, so mark this position as one which might be - -- followed by another inline. - updateLastPreCharPos - return c - --- | Parses the closing character of emphasis -emphasisEnd :: PandocMonad m => Char -> OrgParser m Char -emphasisEnd c = try $ do - guard =<< notAfterForbiddenBorderChar - char c - eof <|> () <$ lookAhead acceptablePostChars - updateLastStrPos - popInlineCharStack - return c - where acceptablePostChars = - surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) - -mathStart :: PandocMonad m => Char -> OrgParser m Char -mathStart c = try $ - char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) - -mathEnd :: PandocMonad m => Char -> OrgParser m Char -mathEnd c = try $ do - res <- noneOf (c:mathForbiddenBorderChars) - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return res - - -enclosedInlines :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m (F Inlines) -enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline - -enclosedRaw :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m String -enclosedRaw start end = try $ - start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end - spanningTwoLines = try $ - anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine - --- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume --- newlines. -many1TillNOrLessNewlines :: PandocMonad m => Int - -> OrgParser m Char - -> OrgParser m a - -> OrgParser m String -many1TillNOrLessNewlines n p end = try $ - nMoreLines (Just n) mempty >>= oneOrMore - where - nMoreLines Nothing cs = return cs - nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine - nMoreLines k cs = try $ (final k cs <|> rest k cs) - >>= uncurry nMoreLines - final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) - finalLine = try $ manyTill p end - minus1 k = k - 1 - oneOrMore cs = guard (not $ null cs) *> return cs - --- Org allows customization of the way it reads emphasis. We use the defaults --- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` --- for details). - --- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) -emphasisPreChars :: [Char] -emphasisPreChars = "\t \"'({" - --- | Chars allowed at after emphasis -emphasisPostChars :: [Char] -emphasisPostChars = "\t\n !\"'),-.:;?\\}" - --- | Chars not allowed at the (inner) border of emphasis -emphasisForbiddenBorderChars :: [Char] -emphasisForbiddenBorderChars = "\t\n\r \"'," - --- | The maximum number of newlines within -emphasisAllowedNewlines :: Int -emphasisAllowedNewlines = 1 - --- LaTeX-style math: see `org-latex-regexps` for details - --- | Chars allowed after an inline ($...$) math statement -mathPostChars :: [Char] -mathPostChars = "\t\n \"'),-.:;?" - --- | Chars not allowed at the (inner) border of math -mathForbiddenBorderChars :: [Char] -mathForbiddenBorderChars = "\t\n\r ,;.$" - --- | Maximum number of newlines in an inline math statement -mathAllowedNewlines :: Int -mathAllowedNewlines = 2 - --- | Whether we are right behind a char allowed before emphasis -afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool -afterEmphasisPreChar = do - pos <- getPosition - lastPrePos <- orgStateLastPreCharPos <$> getState - return . fromMaybe True $ (== pos) <$> lastPrePos - --- | Whether the parser is right after a forbidden border char -notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool -notAfterForbiddenBorderChar = do - pos <- getPosition - lastFBCPos <- orgStateLastForbiddenCharPos <$> getState - return $ lastFBCPos /= Just pos - --- | Read a sub- or superscript expression -subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) -subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") - , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString - ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] - -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do - state <- getState - guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum - ] - -inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) -inlineLaTeX = try $ do - cmd <- inlineLaTeXCommand - ils <- (lift . lift) $ parseAsInlineLaTeX cmd - maybe mzero returnF $ - parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils - where - parseAsMath :: String -> Maybe Inlines - parseAsMath cs = B.fromList <$> texMathToPandoc cs - - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) - parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - - parseAsMathMLSym :: String -> Maybe Inlines - parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 - - state :: ParserState - state = def{ stateOptions = def{ readerExtensions = - enableExtension Ext_raw_tex (readerExtensions def) } } - - texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline - -maybeRight :: Either a b -> Maybe b -maybeRight = either (const Nothing) Just - -inlineLaTeXCommand :: PandocMonad m => OrgParser m String -inlineLaTeXCommand = try $ do - rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest - case parsed of - Right (RawInline _ cs) -> do - -- drop any trailing whitespace, those are not be part of the command as - -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc - count len anyChar - return cmdNoSpc - _ -> mzero - --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - -exportSnippet :: PandocMonad m => OrgParser m (F Inlines) -exportSnippet = try $ do - string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") - returnF $ B.rawInline format snippet - -smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) - where - orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' - orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") - -singleQuoted :: PandocMonad m => OrgParser m (F Inlines) -singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - singleQuoteStart - updatePositions '\'' - withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline (singleQuoteEnd <* updatePositions '\'') - --- doubleQuoted will handle regular double-quoted sections, as well --- as dialogues with an open double-quote without a close double-quote --- in the same paragraph. -doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) -doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - doubleQuoteStart - updatePositions '"' - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs deleted file mode 100644 index 2f4e21248..000000000 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ /dev/null @@ -1,218 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} -{- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Meta - Copyright : Copyright (C) 2014-2017 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode meta declarations. --} -module Text.Pandoc.Readers.Org.Meta - ( metaExport - , metaKey - , metaLine - ) where - -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Blocks, Inlines ) -import Text.Pandoc.Class ( PandocMonad ) -import Text.Pandoc.Definition - -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.List ( intersperse ) -import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Network.HTTP ( urlEncode ) - --- | Returns the current meta, respecting export options. -metaExport :: Monad m => OrgParser m (F Meta) -metaExport = do - st <- getState - let settings = orgStateExportSettings st - return $ (if exportWithAuthor settings then id else removeMeta "author") - . (if exportWithCreator settings then id else removeMeta "creator") - . (if exportWithEmail settings then id else removeMeta "email") - <$> orgStateMeta st - -removeMeta :: String -> Meta -> Meta -removeMeta key meta' = - let metaMap = unMeta meta' - in Meta $ M.delete key metaMap - --- | Parse and handle a single line containing meta information --- The order, in which blocks are tried, makes sure that we're not looking at --- the beginning of a block, so we don't need to check for it -metaLine :: PandocMonad m => OrgParser m Blocks -metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) - -declarationLine :: PandocMonad m => OrgParser m () -declarationLine = try $ do - key <- map toLower <$> metaKey - (key', value) <- metaValue key - updateState $ \st -> - let meta' = B.setMeta key' <$> value <*> pure nullMeta - in st { orgStateMeta = meta' <> orgStateMeta st } - -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces - -metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) -metaValue key = - let inclKey = "header-includes" - in case key of - "author" -> (key,) <$> metaInlinesCommaSeparated - "title" -> (key,) <$> metaInlines - "date" -> (key,) <$> metaInlines - "header-includes" -> (key,) <$> accumulatingList key metaInlines - "latex_header" -> (inclKey,) <$> - accumulatingList inclKey (metaExportSnippet "latex") - "latex_class" -> ("documentclass",) <$> metaString - -- Org-mode expects class options to contain the surrounding brackets, - -- pandoc does not. - "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) - "html_head" -> (inclKey,) <$> - accumulatingList inclKey (metaExportSnippet "html") - _ -> (key,) <$> metaString - -metaInlines :: PandocMonad m => OrgParser m (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - -metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) -metaInlinesCommaSeparated = do - authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') - newline - authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs - let toMetaInlines = MetaInlines . B.toList - return $ MetaList . map toMetaInlines <$> sequence authors - -metaString :: Monad m => OrgParser m (F MetaValue) -metaString = metaModifiedString id - -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) -metaModifiedString f = return . MetaString . f <$> anyLine - --- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) -metaExportSnippet format = - return . MetaInlines . B.toList . B.rawInline format <$> anyLine - --- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String - -> OrgParser m (F MetaValue) - -> OrgParser m (F MetaValue) -accumulatingList key p = do - value <- p - meta' <- orgStateMeta <$> getState - return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value - where curList m = case lookupMeta key m of - Just (MetaList ms) -> ms - Just x -> [x] - _ -> [] - --- --- export options --- -optionLine :: Monad m => OrgParser m () -optionLine = try $ do - key <- metaKey - case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - "options" -> exportSettings - "todo" -> todoSequence >>= updateState . registerTodoSequence - "seq_todo" -> todoSequence >>= updateState . registerTodoSequence - "typ_todo" -> todoSequence >>= updateState . registerTodoSequence - _ -> mzero - -addLinkFormat :: Monad m => String - -> (String -> String) - -> OrgParser m () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - -parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) -parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces - linkSubst <- parseFormat - return (linkType, linkSubst) - --- | An ad-hoc, single-argument-only implementation of a printf-style format --- parser. -parseFormat :: Monad m => OrgParser m (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend - where - -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) - <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) - <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest - - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) - -inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline - --- --- ToDo Sequences and Keywords --- -todoSequence :: Monad m => OrgParser m TodoSequence -todoSequence = try $ do - todoKws <- todoKeywords - doneKws <- optionMaybe $ todoDoneSep *> todoKeywords - newline - -- There must be at least one DONE keyword. The last TODO keyword is taken if - -- necessary. - case doneKws of - Just done -> return $ keywordsToSequence todoKws done - Nothing -> case reverse todoKws of - [] -> mzero -- no keywords present - (x:xs) -> return $ keywordsToSequence (reverse xs) [x] - - where - todoKeywords :: Monad m => OrgParser m [String] - todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces - endOfKeywords = todoDoneSep <|> void newline - in manyTill keyword (lookAhead endOfKeywords) - - todoDoneSep :: Monad m => OrgParser m () - todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - - keywordsToSequence :: [String] -> [String] -> TodoSequence - keywordsToSequence todo done = - let todoMarkers = map (TodoMarker Todo) todo - doneMarkers = map (TodoMarker Done) done - in todoMarkers ++ doneMarkers diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs deleted file mode 100644 index 181dd1d5c..000000000 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ /dev/null @@ -1,259 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Define the Org-mode parser state. --} -module Text.Pandoc.Readers.Org.ParserState - ( OrgParserState (..) - , OrgParserLocal (..) - , OrgNoteRecord - , HasReaderOptions (..) - , HasQuoteContext (..) - , TodoMarker (..) - , TodoSequence - , TodoState (..) - , activeTodoMarkers - , registerTodoSequence - , F(..) - , askF - , asksF - , trimInlinesF - , runF - , returnF - , ExportSettings (..) - , ArchivedTreesOption (..) - , optionsToParserState - ) where - -import Control.Monad (liftM, liftM2) -import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local) - -import Data.Default (Default(..)) -import qualified Data.Map as M -import qualified Data.Set as Set - -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Text.Pandoc.Definition ( Meta(..), nullMeta ) -import Text.Pandoc.Options ( ReaderOptions(..) ) -import Text.Pandoc.Parsing ( HasHeaderMap(..) - , HasIdentifierList(..) - , HasLastStrPosition(..) - , HasQuoteContext(..) - , HasReaderOptions(..) - , ParserContext(..) - , QuoteContext(..) - , SourcePos ) - --- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) --- | Table of footnotes -type OrgNoteTable = [OrgNoteRecord] --- | Map of functions for link transformations. The map key is refers to the --- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) - --- | The states in which a todo item can be -data TodoState = Todo | Done - deriving (Eq, Ord, Show) - --- | A ToDo keyword like @TODO@ or @DONE@. -data TodoMarker = TodoMarker - { todoMarkerState :: TodoState - , todoMarkerName :: String - } - deriving (Show, Eq) - --- | Collection of todo markers in the order in which items should progress -type TodoSequence = [TodoMarker] - --- | Org-mode parser state -data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] - , orgStateEmphasisCharStack :: [Char] - , orgStateEmphasisNewlines :: Maybe Int - , orgStateExportSettings :: ExportSettings - , orgStateHeaderMap :: M.Map Inlines String - , orgStateIdentifiers :: Set.Set String - , orgStateLastForbiddenCharPos :: Maybe SourcePos - , orgStateLastPreCharPos :: Maybe SourcePos - , orgStateLastStrPos :: Maybe SourcePos - , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMeta :: F Meta - , orgStateNotes' :: OrgNoteTable - , orgStateOptions :: ReaderOptions - , orgStateParserContext :: ParserContext - , orgStateTodoSequences :: [TodoSequence] - } - -data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } - -instance Default OrgParserLocal where - def = OrgParserLocal NoQuote - -instance HasReaderOptions OrgParserState where - extractReaderOptions = orgStateOptions - -instance HasLastStrPosition OrgParserState where - getLastStrPos = orgStateLastStrPos - setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } - -instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where - getQuoteContext = asks orgLocalQuoteContext - withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) - -instance HasIdentifierList OrgParserState where - extractIdentifierList = orgStateIdentifiers - updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } - -instance HasHeaderMap OrgParserState where - extractHeaderMap = orgStateHeaderMap - updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } - -instance Default OrgParserState where - def = defaultOrgParserState - -defaultOrgParserState :: OrgParserState -defaultOrgParserState = OrgParserState - { orgStateAnchorIds = [] - , orgStateEmphasisCharStack = [] - , orgStateEmphasisNewlines = Nothing - , orgStateExportSettings = def - , orgStateHeaderMap = M.empty - , orgStateIdentifiers = Set.empty - , orgStateLastForbiddenCharPos = Nothing - , orgStateLastPreCharPos = Nothing - , orgStateLastStrPos = Nothing - , orgStateLinkFormatters = M.empty - , orgStateMeta = return nullMeta - , orgStateNotes' = [] - , orgStateOptions = def - , orgStateParserContext = NullState - , orgStateTodoSequences = [] - } - -optionsToParserState :: ReaderOptions -> OrgParserState -optionsToParserState opts = - def { orgStateOptions = opts } - -registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState -registerTodoSequence todoSeq st = - let curSeqs = orgStateTodoSequences st - in st{ orgStateTodoSequences = todoSeq : curSeqs } - --- | Get the current todo/done sequences. If no custom todo sequences have been --- defined, return a list containing just the default todo/done sequence. -activeTodoSequences :: OrgParserState -> [TodoSequence] -activeTodoSequences st = - let curSeqs = orgStateTodoSequences st - in if null curSeqs - then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]] - else curSeqs - -activeTodoMarkers :: OrgParserState -> TodoSequence -activeTodoMarkers = concat . activeTodoSequences - - --- --- Export Settings --- - --- | Options for the way archived trees are handled. -data ArchivedTreesOption = - ArchivedTreesExport -- ^ Export the complete tree - | ArchivedTreesNoExport -- ^ Exclude archived trees from exporting - | ArchivedTreesHeadlineOnly -- ^ Export only the headline, discard the contents - --- | Export settings <http://orgmode.org/manual/Export-settings.html> --- These settings can be changed via OPTIONS statements. -data ExportSettings = ExportSettings - { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] - -- ^ Specify drawer names which should be exported. @Left@ names are - -- explicitly excluded from the resulting output while @Right@ means that - -- only the listed drawer names should be included. - , exportEmphasizedText :: Bool -- ^ Parse emphasized text - , exportHeadlineLevels :: Int - -- ^ Maximum depth of headlines, deeper headlines are convert to list - , exportSmartQuotes :: Bool -- ^ Parse quotes smartly - , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly - , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts - , exportWithAuthor :: Bool -- ^ Include author in final meta-data - , exportWithCreator :: Bool -- ^ Include creator in final meta-data - , exportWithEmail :: Bool -- ^ Include email in final meta-data - , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers - } - -instance Default ExportSettings where - def = defaultExportSettings - -defaultExportSettings :: ExportSettings -defaultExportSettings = ExportSettings - { exportArchivedTrees = ArchivedTreesHeadlineOnly - , exportDrawers = Left ["LOGBOOK"] - , exportEmphasizedText = True - , exportHeadlineLevels = 3 - , exportSmartQuotes = True - , exportSpecialStrings = True - , exportSubSuperscripts = True - , exportWithAuthor = True - , exportWithCreator = True - , exportWithEmail = True - , exportWithTodoKeywords = True - } - - --- --- Parser state reader --- - --- | Reader monad wrapping the parser state. This is used to delay evaluation --- until all relevant information has been parsed and made available in the --- parser state. See also the newtype of the same name in --- Text.Pandoc.Parsing. -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Functor, Applicative, Monad) - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: Monad m => a -> m (F a) -returnF = return . return diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs deleted file mode 100644 index 1eb8a3b00..000000000 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ /dev/null @@ -1,217 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Org-mode parsing utilities. - -Most functions are simply re-exports from @Text.Pandoc.Parsing@, some -functions are adapted to Org-mode specific functionality. --} -module Text.Pandoc.Readers.Org.Parsing - ( OrgParser - , anyLine - , blanklines - , newline - , parseFromString - , skipSpaces1 - , inList - , withContext - , getExportSetting - , updateLastForbiddenCharPos - , updateLastPreCharPos - , orgArgKey - , orgArgWord - , orgArgWordChar - -- * Re-exports from Text.Pandoc.Parser - , ParserContext (..) - , many1Till - , notFollowedBy' - , spaceChar - , nonspaceChar - , skipSpaces - , blankline - , enclosed - , stringAnyCase - , charsInBalanced - , uri - , withRaw - , readWithM - , guardEnabled - , updateLastStrPos - , notAfterString - , ParserState (..) - , registerHeader - , QuoteContext (..) - , singleQuoteStart - , singleQuoteEnd - , doubleQuoteStart - , doubleQuoteEnd - , dash - , ellipses - , citeKey - -- * Re-exports from Text.Pandoc.Parsec - , runParser - , runParserT - , getInput - , char - , letter - , digit - , alphaNum - , skipMany1 - , spaces - , anyChar - , satisfy - , string - , count - , eof - , noneOf - , oneOf - , lookAhead - , notFollowedBy - , many - , many1 - , manyTill - , (<|>) - , (<?>) - , choice - , try - , sepBy - , sepBy1 - , sepEndBy1 - , option - , optional - , optionMaybe - , getState - , updateState - , SourcePos - , getPosition - ) where - -import Text.Pandoc.Readers.Org.ParserState - -import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline - , parseFromString ) - -import Control.Monad ( guard ) -import Control.Monad.Reader ( ReaderT ) - --- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) - --- --- Adaptions and specializations of parsing utilities --- - --- | Parse any line of text -anyLine :: Monad m => OrgParser m String -anyLine = - P.anyLine - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a -parseFromString parser str' = do - oldLastPreCharPos <- orgStateLastPreCharPos <$> getState - updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } - result <- P.parseFromString parser str' - updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } - return result - --- | Skip one or more tab or space characters. -skipSpaces1 :: Monad m => OrgParser m () -skipSpaces1 = skipMany1 spaceChar - --- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: Monad m => OrgParser m Char -newline = - P.newline - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] -blanklines = - P.blanklines - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Succeeds when we're in list context. -inList :: Monad m => OrgParser m () -inList = do - ctx <- orgStateParserContext <$> getState - guard (ctx == ListItemState) - --- | Parse in different context -withContext :: Monad m - => ParserContext -- ^ New parser context - -> OrgParser m a -- ^ Parser to run in that context - -> OrgParser m a -withContext context parser = do - oldContext <- orgStateParserContext <$> getState - updateState $ \s -> s{ orgStateParserContext = context } - result <- parser - updateState $ \s -> s{ orgStateParserContext = oldContext } - return result - --- --- Parser state functions --- - --- | Get an export setting. -getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a -getExportSetting s = s . orgStateExportSettings <$> getState - --- | Set the current position as the last position at which a forbidden char --- was found (i.e. a character which is not allowed at the inner border of --- markup). -updateLastForbiddenCharPos :: Monad m => OrgParser m () -updateLastForbiddenCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} - --- | Set the current parser position as the position at which a character was --- seen which allows inline markup to follow. -updateLastPreCharPos :: Monad m => OrgParser m () -updateLastPreCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastPreCharPos = Just p} - --- --- Org key-value parsing --- - --- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - --- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar - --- | Chars treated as part of a word in plists. -orgArgWordChar :: Monad m => OrgParser m Char -orgArgWordChar = alphaNum <|> oneOf "-_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs deleted file mode 100644 index 8c87cfa25..000000000 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ /dev/null @@ -1,97 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Utility functions used in other Pandoc Org modules. --} -module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString - , isImageFilename - , rundocBlockClass - , toRundocAttrib - , translateLang - ) where - -import Control.Arrow ( first ) -import Data.Char ( isAlphaNum ) -import Data.List ( isPrefixOf, isSuffixOf ) - - --- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols || - ':' `notElem` filename) - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] - --- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if --- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' - _ | isUrl s -> Just s -- URL - _ -> Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) - --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - --- | Prefix the name of a attribute, marking it as a code execution parameter. -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first (rundocPrefix ++) - --- | Translate from Org-mode's programming language identifiers to those used --- by Pandoc. This is useful to allow for proper syntax highlighting in --- Pandoc output. -translateLang :: String -> String -translateLang cs = - case cs of - "C" -> "c" - "C++" -> "cpp" - "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported - "js" -> "javascript" - "lisp" -> "commonlisp" - "R" -> "r" - "sh" -> "bash" - "sqlite" -> "sql" - _ -> cs diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs deleted file mode 100644 index 441c573d9..000000000 --- a/src/Text/Pandoc/Readers/RST.hs +++ /dev/null @@ -1,1354 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion from reStructuredText to 'Pandoc' document. --} -module Text.Pandoc.Readers.RST ( readRST ) where -import Text.Pandoc.Definition -import Text.Pandoc.Builder (setMeta, fromList) -import Text.Pandoc.Shared -import Text.Pandoc.Parsing -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Error -import Control.Monad ( when, liftM, guard, mzero ) -import Data.List ( findIndex, intercalate, isInfixOf, - transpose, sort, deleteFirstsBy, isSuffixOf , nub, union) -import Data.Maybe (fromMaybe, isJust) -import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import qualified Text.Pandoc.Builder as B -import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit, isSpace, toUpper) -import Data.Monoid ((<>)) -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, readFileFromDirs) - --- TODO: --- [ ] .. parsed-literal --- [ ] :widths: attribute in .. table --- [ ] .. csv-table --- [ ] .. list-table - --- | Parse reStructuredText string and return Pandoc document. -readRST :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readRST opts s = do - parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n") - case parsed of - Right result -> return result - Left e -> throwError e - -type RSTParser m = ParserT [Char] ParserState m - --- --- Constants and data structure definitions ---- - -bulletListMarkers :: [Char] -bulletListMarkers = "*+-" - -underlineChars :: [Char] -underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221" - --- --- parsing documents --- - -isHeader :: Int -> Block -> Bool -isHeader n (Header x _ _) = x == n -isHeader _ _ = False - --- | Promote all headers in a list of blocks. (Part of --- title transformation for RST.) -promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level attr text):rest) = - (Header (level - num) attr text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders _ [] = [] - --- | If list of blocks starts with a header (or a header and subheader) --- of level that are not found elsewhere, return it as a title and --- promote all the other headers. Also process a definition list right --- after the title block as metadata. -titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata - -> ([Block], Meta) -- ^ modified list of blocks, metadata -titleTransform (bs, meta) = - let (bs', meta') = - case bs of - ((Header 1 _ head1):(Header 2 _ head2):rest) - | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub - (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ - setMeta "subtitle" (fromList head2) meta) - ((Header 1 _ head1):rest) - | not (any (isHeader 1) rest) -> -- title only - (promoteHeaders 1 rest, - setMeta "title" (fromList head1) meta) - _ -> (bs, meta) - in case bs' of - (DefinitionList ds : rest) -> - (rest, metaFromDefList ds meta') - _ -> (bs', meta') - -metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta -metaFromDefList ds meta = adjustAuthors $ foldr f meta ds - where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) - adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" - $ M.adjust toPlain "date" - $ M.adjust toPlain "title" - $ M.mapKeys (\k -> if k == "authors" then "author" else k) - $ metamap - toPlain (MetaBlocks [Para xs]) = MetaInlines xs - toPlain x = x - splitAuthors (MetaBlocks [Para xs]) - = MetaList $ map MetaInlines - $ splitAuthors' xs - splitAuthors x = x - splitAuthors' = map normalizeSpaces . - splitOnSemi . concatMap factorSemi - splitOnSemi = splitBy (==Str ";") - factorSemi (Str []) = [] - factorSemi (Str s) = case break (==';') s of - (xs,[]) -> [Str xs] - (xs,';':ys) -> Str xs : Str ";" : - factorSemi (Str ys) - (xs,ys) -> Str xs : - factorSemi (Str ys) - factorSemi x = [x] - -parseRST :: PandocMonad m => RSTParser m Pandoc -parseRST = do - optional blanklines -- skip blank lines at beginning of file - startPos <- getPosition - -- go through once just to get list of reference keys and notes - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- concat <$> - manyTill (referenceKey <|> noteBlock <|> lineClump) eof - setInput docMinusKeys - setPosition startPos - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - -- now parse it for real... - blocks <- B.toList <$> parseBlocks - standalone <- getOption readerStandalone - state <- getState - let meta = stateMeta state - let (blocks', meta') = if standalone - then titleTransform (blocks, meta) - else (blocks, meta) - reportLogMessages - return $ Pandoc meta' blocks' - --- --- parsing blocks --- - -parseBlocks :: PandocMonad m => RSTParser m Blocks -parseBlocks = mconcat <$> manyTill block eof - -block :: PandocMonad m => RSTParser m Blocks -block = choice [ codeBlock - , blockQuote - , fieldList - , include - , directive - , comment - , header - , hrule - , lineBlock -- must go before definitionList - , table - , list - , lhsCodeBlock - , para - , mempty <$ blanklines - ] <?> "block" - --- --- field list --- - -rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) -rawFieldListItem minIndent = try $ do - indent <- length <$> many (char ' ') - guard $ indent >= minIndent - char ':' - name <- many1Till (noneOf "\n") (char ':') - (() <$ lookAhead newline) <|> skipMany1 spaceChar - first <- anyLine - rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) - indentedBlock - let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, raw) - -fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) -fieldListItem minIndent = try $ do - (name, raw) <- rawFieldListItem minIndent - term <- parseInlineFromString name - contents <- parseFromString parseBlocks raw - optional blanklines - return (term, [contents]) - -fieldList :: PandocMonad m => RSTParser m Blocks -fieldList = try $ do - indent <- length <$> lookAhead (many spaceChar) - items <- many1 $ fieldListItem indent - case items of - [] -> return mempty - items' -> return $ B.definitionList items' - --- --- line block --- - -lineBlock :: PandocMonad m => RSTParser m Blocks -lineBlock = try $ do - lines' <- lineBlockLines - lines'' <- mapM parseInlineFromString lines' - return $ B.lineBlock lines'' - -lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks -lineBlockDirective body = do - lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body - return $ B.lineBlock lines' - --- --- paragraph block --- - --- note: paragraph can end in a :: starting a code block -para :: PandocMonad m => RSTParser m Blocks -para = try $ do - result <- trimInlines . mconcat <$> many1 inline - option (B.plain result) $ try $ do - newline - blanklines - case viewr (B.unMany result) of - ys :> (Str xs) | "::" `isSuffixOf` xs -> do - raw <- option mempty codeBlockBody - return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) - <> raw - _ -> return (B.para result) - -plain :: PandocMonad m => RSTParser m Blocks -plain = B.plain . trimInlines . mconcat <$> many1 inline - --- --- header blocks --- - -header :: PandocMonad m => RSTParser m Blocks -header = doubleHeader <|> singleHeader <?> "header" - --- a header with lines on top and bottom -doubleHeader :: PandocMonad m => RSTParser m Blocks -doubleHeader = try $ do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- trimInlines . mconcat <$> many1 (notFollowedBy blankline >> inline) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - attr <- registerHeader nullAttr txt - return $ B.headerWith attr level txt - --- a header with line on the bottom only -singleHeader :: PandocMonad m => RSTParser m Blocks -singleHeader = try $ do - notFollowedBy' whitespace - txt <- trimInlines . mconcat <$> many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - attr <- registerHeader nullAttr txt - return $ B.headerWith attr level txt - --- --- hrule block --- - -hrule :: Monad m => ParserT [Char] st m Blocks -hrule = try $ do - chr <- oneOf underlineChars - count 3 (char chr) - skipMany (char chr) - blankline - blanklines - return B.horizontalRule - --- --- code blocks --- - --- read a line indented by a given string -indentedLine :: Monad m => String -> ParserT [Char] st m [Char] -indentedLine indents = try $ do - string indents - anyLine - --- one or more indented lines, possibly separated by blank lines. --- any amount of indentation will work. -indentedBlock :: Monad m => ParserT [Char] st m [Char] -indentedBlock = try $ do - indents <- lookAhead $ many1 spaceChar - lns <- many1 $ try $ do b <- option "" blanklines - l <- indentedLine indents - return (b ++ l) - optional blanklines - return $ unlines lns - -quotedBlock :: Monad m => ParserT [Char] st m [Char] -quotedBlock = try $ do - quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - lns <- many1 $ lookAhead (char quote) >> anyLine - optional blanklines - return $ unlines lns - -codeBlockStart :: Monad m => ParserT [Char] st m Char -codeBlockStart = string "::" >> blankline >> blankline - -codeBlock :: Monad m => ParserT [Char] st m Blocks -codeBlock = try $ codeBlockStart >> codeBlockBody - -codeBlockBody :: Monad m => ParserT [Char] st m Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> - (indentedBlock <|> quotedBlock) - -lhsCodeBlock :: Monad m => RSTParser m Blocks -lhsCodeBlock = try $ do - getPosition >>= guard . (==1) . sourceColumn - guardEnabled Ext_literate_haskell - optional codeBlockStart - lns <- latexCodeBlock <|> birdCodeBlock - blanklines - return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) - $ intercalate "\n" lns - -latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] -latexCodeBlock = try $ do - try (latexBlockLine "\\begin{code}") - many1Till anyLine (try $ latexBlockLine "\\end{code}") - where - latexBlockLine s = skipMany spaceChar >> string s >> blankline - -birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] -birdCodeBlock = filterSpace <$> many1 birdTrackLine - where filterSpace lns = - -- if (as is normal) there is always a space after >, drop it - if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns - -birdTrackLine :: Monad m => ParserT [Char] st m [Char] -birdTrackLine = char '>' >> anyLine - --- --- block quotes --- - -blockQuote :: PandocMonad m => RSTParser m Blocks -blockQuote = do - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return $ B.blockQuote contents - -{- -Unsupported options for include: -tab-width -encoding --} - -include :: PandocMonad m => RSTParser m Blocks -include = try $ do - string ".. include::" - skipMany spaceChar - f <- trim <$> anyLine - fields <- many $ rawFieldListItem 3 - -- options - let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead - let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead - guard $ not (null f) - oldPos <- getPosition - oldInput <- getInput - containers <- stateContainers <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos - updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] f - contentLines <- case mbContents of - Just s -> return $ lines s - Nothing -> do - logMessage $ CouldNotLoadIncludeFile f oldPos - return [] - let numLines = length contentLines - let startLine' = case startLine of - Nothing -> 1 - Just x | x >= 0 -> x - | otherwise -> numLines + x -- negative from end - let endLine' = case endLine of - Nothing -> numLines + 1 - Just x | x >= 0 -> x - | otherwise -> numLines + x -- negative from end - let contentLines' = drop (startLine' - 1) - $ take (endLine' - 1) - $ contentLines - let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `isInfixOf`)) - Nothing -> id) . - (case trim <$> lookup "start-after" fields of - Just patt -> drop 1 . - dropWhile (not . (patt `isInfixOf`)) - Nothing -> id) $ contentLines' - let contents' = unlines contentLines'' - case lookup "code" fields of - Just lang -> do - let numberLines = lookup "number-lines" fields - let classes = trimr lang : ["numberLines" | isJust numberLines] ++ - maybe [] words (lookup "class" fields) - let kvs = maybe [] (\n -> [("startFrom", trimr n)]) numberLines - let ident = maybe "" trimr $ lookup "name" fields - let attribs = (ident, classes, kvs) - return $ B.codeBlockWith attribs contents' - Nothing -> case lookup "literal" fields of - Just _ -> return $ B.rawBlock "rst" contents' - Nothing -> do - setPosition $ newPos f 1 1 - setInput contents' - bs <- optional blanklines >> - (mconcat <$> many block) - setInput oldInput - setPosition oldPos - updateState $ \s -> s{ stateContainers = - tail $ stateContainers s } - return bs - - --- --- list blocks --- - -list :: PandocMonad m => RSTParser m Blocks -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -definitionListItem :: PandocMonad m => RSTParser m (Inlines, [Blocks]) -definitionListItem = try $ do - -- avoid capturing a directive or comment - notFollowedBy (try $ char '.' >> char '.') - term <- trimInlines . mconcat <$> many1Till inline endline - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n" - return (term, [contents]) - -definitionList :: PandocMonad m => RSTParser m Blocks -definitionList = B.definitionList <$> many1 definitionListItem - --- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT [Char] st m Int -bulletListStart = try $ do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - return $ length (marker:white) - --- parses ordered list start and returns its length (inc following whitespace) -orderedListStart :: Monad m => ListNumberStyle - -> ListNumberDelim - -> RSTParser m Int -orderedListStart style delim = try $ do - (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar - return $ markerLen + length white - --- parse a line of a list item -listLine :: Monad m => Int -> RSTParser m [Char] -listLine markerLength = try $ do - notFollowedBy blankline - indentWith markerLength - line <- anyLine - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Monad m => Int -> RSTParser m [Char] -indentWith num = do - tabStop <- getOption readerTabStop - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: Monad m => RSTParser m Int - -> RSTParser m (Int, [Char]) -rawListItem start = try $ do - markerLength <- start - firstLine <- anyLine - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) - --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int -> RSTParser m [Char] -listContinuation markerLength = try $ do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return $ blanks ++ concat result - -listItem :: PandocMonad m - => RSTParser m Int - -> RSTParser m Blocks -listItem start = try $ do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - skipMany1 blankline <|> () <$ lookAhead start - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ "\n" - updateState (\st -> st {stateParserContext = oldContext}) - return $ case B.toList parsed of - [Para xs] -> B.singleton $ Plain xs - [Para xs, BulletList ys] -> B.fromList [Plain xs, BulletList ys] - [Para xs, OrderedList s ys] -> B.fromList [Plain xs, OrderedList s ys] - [Para xs, DefinitionList ys] -> B.fromList [Plain xs, DefinitionList ys] - _ -> parsed - -orderedList :: PandocMonad m => RSTParser m Blocks -orderedList = try $ do - (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar) - items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return $ B.orderedListWith (start, style, delim) items' - -bulletList :: PandocMonad m => RSTParser m Blocks -bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart) - --- --- directive (e.g. comment, container, compound-paragraph) --- - -comment :: Monad m => RSTParser m Blocks -comment = try $ do - string ".." - skipMany1 spaceChar <|> (() <$ lookAhead newline) - notFollowedBy' directiveLabel - manyTill anyChar blanklines - optional indentedBlock - return mempty - -directiveLabel :: Monad m => RSTParser m String -directiveLabel = map toLower - <$> many1Till (letter <|> char '-') (try $ string "::") - -directive :: PandocMonad m => RSTParser m Blocks -directive = try $ do - string ".." - directive' - -directive' :: PandocMonad m => RSTParser m Blocks -directive' = do - skipMany1 spaceChar - label <- directiveLabel - skipMany spaceChar - top <- many $ satisfy (/='\n') - <|> try (char '\n' <* - notFollowedBy' (rawFieldListItem 3) <* - count 3 (char ' ') <* - notFollowedBy blankline) - newline - fields <- many $ rawFieldListItem 3 - body <- option "" $ try $ blanklines >> indentedBlock - optional blanklines - let body' = body ++ "\n\n" - imgAttr cl = ("", classes, getAtt "width" ++ getAtt "height") - where - classes = words $ maybe "" trim $ lookup cl fields - getAtt k = case lookup k fields of - Just v -> [(k, filter (not . isSpace) v)] - Nothing -> [] - case label of - "table" -> tableDirective top fields body' - "line-block" -> lineBlockDirective body' - "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields - "container" -> parseFromString parseBlocks body' - "replace" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim top) - "unicode" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim $ unicodeTransform top) - "compound" -> parseFromString parseBlocks body' - "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' - "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' - "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' - "rubric" -> B.para . B.strong <$> parseInlineFromString top - _ | label `elem` ["attention","caution","danger","error","hint", - "important","note","tip","warning","admonition"] -> - do bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' - let lab = case label of - "admonition" -> mempty - (l:ls) -> B.divWith ("",["admonition-title"],[]) - (B.para (B.str (toUpper l : ls))) - [] -> mempty - return $ B.divWith ("",[label],[]) (lab <> bod) - "sidebar" -> - do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseInlineFromString - (trim top ++ if null subtit - then "" - else (": " ++ subtit)) - bod <- parseFromString parseBlocks body' - return $ B.divWith ("",["sidebar"],[]) $ tit <> bod - "topic" -> - do tit <- B.para . B.strong <$> parseInlineFromString top - bod <- parseFromString parseBlocks body' - return $ B.divWith ("",["topic"],[]) $ tit <> bod - "default-role" -> mempty <$ updateState (\s -> - s { stateRstDefaultRole = - case trim top of - "" -> stateRstDefaultRole def - role -> role }) - x | x == "code" || x == "code-block" -> - codeblock (words $ fromMaybe [] $ lookup "class" fields) - (lookup "number-lines" fields) (trim top) body - "aafig" -> do - let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) - return $ B.codeBlockWith attribs $ stripTrailingNewlines body - "math" -> return $ B.para $ mconcat $ map B.displayMath - $ toChunks $ top ++ "\n\n" ++ body - "figure" -> do - (caption, legend) <- parseFromString extractCaption body' - let src = escapeURI $ trim top - return $ B.para (B.imageWith (imgAttr "figclass") src "fig:" caption) <> legend - "image" -> do - let src = escapeURI $ trim top - let alt = B.str $ maybe "image" trim $ lookup "alt" fields - let attr = imgAttr "class" - return $ B.para - $ case lookup "target" fields of - Just t -> B.link (escapeURI $ trim t) "" - $ B.imageWith attr src "" alt - Nothing -> B.imageWith attr src "" alt - "class" -> do - let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) - -- directive content or the first immediately following element - children <- case body of - "" -> block - _ -> parseFromString parseBlocks body' - return $ B.divWith attrs children - other -> do - pos <- getPosition - logMessage $ SkippedContent (".. " ++ other) pos - return mempty - -tableDirective :: PandocMonad m - => String -> [(String, String)] -> String -> RSTParser m Blocks -tableDirective top _fields body = do - bs <- parseFromString parseBlocks body - case B.toList bs of - [Table _ aligns' widths' header' rows'] -> do - title <- parseFromString (trimInlines . mconcat <$> many inline) top - -- TODO widths - -- align is not applicable since we can't represent whole table align - return $ B.singleton $ Table (B.toList title) - aligns' widths' header' rows' - _ -> return mempty - --- TODO: --- - Only supports :format: fields with a single format for :raw: roles, --- change Text.Pandoc.Definition.Format to fix -addNewRole :: PandocMonad m => String -> [(String, String)] -> RSTParser m Blocks -addNewRole roleString fields = do - pos <- getPosition - (role, parentRole) <- parseFromString inheritedRole roleString - customRoles <- stateRstCustomRoles <$> getState - let getBaseRole (r, f, a) roles = - case M.lookup r roles of - Just (r', f', a') -> getBaseRole (r', f', a') roles - Nothing -> (r, f, a) - (baseRole, baseFmt, baseAttr) = - getBaseRole (parentRole, Nothing, nullAttr) customRoles - fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [String] -> [String] - annotate = maybe id (:) $ - if baseRole == "code" - then lookup "language" fields - else Nothing - attr = let (ident, classes, keyValues) = baseAttr - -- nub in case role name & language class are the same - in (ident, nub . (role :) . annotate $ classes, keyValues) - - -- warn about syntax we ignore - flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (baseRole /= "code") $ logMessage $ - SkippedContent ":language: [because parent of role is not :code:]" - pos - "format" -> when (baseRole /= "raw") $ logMessage $ - SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos - when (parentRole == "raw" && countKeys "format" > 1) $ - logMessage $ SkippedContent ":format: [after first in definition of role]" - pos - when (parentRole == "code" && countKeys "language" > 1) $ - logMessage $ SkippedContent - ":language: [after first in definition of role]" pos - - updateState $ \s -> s { - stateRstCustomRoles = - M.insert role (baseRole, fmt, attr) customRoles - } - - return mempty - where - countKeys k = length . filter (== k) . map fst $ fields - inheritedRole = - (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span") - - --- Can contain character codes as decimal numbers or --- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u --- or as XML-style hexadecimal character entities, e.g. ᨫ --- or text, which is used as-is. Comments start with .. -unicodeTransform :: String -> String -unicodeTransform t = - case t of - ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment - ('0':'x':xs) -> go "0x" xs - ('x':xs) -> go "x" xs - ('\\':'x':xs) -> go "\\x" xs - ('U':'+':xs) -> go "U+" xs - ('u':xs) -> go "u" xs - ('\\':'u':xs) -> go "\\u" xs - ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs) - -- drop semicolon - (\(c,s) -> c : unicodeTransform (drop 1 s)) - $ extractUnicodeChar xs - (x:xs) -> x : unicodeTransform xs - [] -> [] - where go pref zs = maybe (pref ++ unicodeTransform zs) - (\(c,s) -> c : unicodeTransform s) - $ extractUnicodeChar zs - -extractUnicodeChar :: String -> Maybe (Char, String) -extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc - where (ds,rest) = span isHexDigit s - mbc = safeRead ('\'':'\\':'x':ds ++ "'") - -extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) -extractCaption = do - capt <- trimInlines . mconcat <$> many inline - legend <- optional blanklines >> (mconcat <$> many block) - return (capt,legend) - --- divide string by blanklines -toChunks :: String -> [String] -toChunks = dropWhile null - . map (trim . unlines) - . splitBy (all (`elem` (" \t" :: String))) . lines - -codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks -codeblock classes numberLines lang body = - return $ B.codeBlockWith attribs $ stripTrailingNewlines body - where attribs = ("", classes', kvs) - classes' = "sourceCode" : lang - : maybe [] (\_ -> ["numberLines"]) numberLines - ++ classes - kvs = case numberLines of - Just "" -> [] - Nothing -> [] - Just n -> [("startFrom",trim n)] - ---- ---- note block ---- - -noteBlock :: Monad m => RSTParser m [Char] -noteBlock = try $ do - startPos <- getPosition - string ".." - spaceChar >> skipMany spaceChar - ref <- noteMarker - first <- (spaceChar >> skipMany spaceChar >> anyLine) - <|> (newline >> return "") - blanks <- option "" blanklines - rest <- option "" indentedBlock - endPos <- getPosition - let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let newnote = (ref, raw) - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -noteMarker :: Monad m => RSTParser m [Char] -noteMarker = do - char '[' - res <- many1 digit - <|> (try $ char '#' >> liftM ('#':) simpleReferenceName') - <|> count 1 (oneOf "#*") - char ']' - return res - --- --- reference key --- - -quotedReferenceName :: PandocMonad m => RSTParser m Inlines -quotedReferenceName = try $ do - char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- trimInlines . mconcat <$> many1Till inline (char '`') - return label' - -unquotedReferenceName :: PandocMonad m => RSTParser m Inlines -unquotedReferenceName = try $ do - label' <- trimInlines . mconcat <$> many1Till inline (lookAhead $ char ':') - return label' - --- Simple reference names are single words consisting of alphanumerics --- plus isolated (no two adjacent) internal hyphens, underscores, --- periods, colons and plus signs; no whitespace or other characters --- are allowed. -simpleReferenceName' :: Monad m => ParserT [Char] st m String -simpleReferenceName' = do - x <- alphaNum - xs <- many $ alphaNum - <|> (try $ oneOf "-_:+." <* lookAhead alphaNum) - return (x:xs) - -simpleReferenceName :: Monad m => ParserT [Char] st m Inlines -simpleReferenceName = do - raw <- simpleReferenceName' - return $ B.str raw - -referenceName :: PandocMonad m => RSTParser m Inlines -referenceName = quotedReferenceName <|> - (try $ simpleReferenceName <* lookAhead (char ':')) <|> - unquotedReferenceName - -referenceKey :: PandocMonad m => RSTParser m [Char] -referenceKey = do - startPos <- getPosition - choice [substKey, anonymousKey, regularKey] - optional blanklines - endPos <- getPosition - -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -targetURI :: Monad m => ParserT [Char] st m [Char] -targetURI = do - skipSpaces - optional newline - contents <- many1 (try (many spaceChar >> newline >> - many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") - blanklines - return $ escapeURI $ trim $ contents - -substKey :: PandocMonad m => RSTParser m () -substKey = try $ do - string ".." - skipMany1 spaceChar - (alt,ref) <- withRaw $ trimInlines . mconcat - <$> enclosed (char '|') (char '|') inline - res <- B.toList <$> directive' - il <- case res of - -- use alt unless :alt: attribute on image: - [Para [Image attr [Str "image"] (src,tit)]] -> - return $ B.imageWith attr src tit alt - [Para [Link _ [Image attr [Str "image"] (src,tit)] (src',tit')]] -> - return $ B.link src' tit' (B.imageWith attr src tit alt) - [Para ils] -> return $ B.fromList ils - _ -> mzero - let key = toKey $ stripFirstAndLast ref - updateState $ \s -> s{ stateSubstitutions = M.insert key il $ stateSubstitutions s } - -anonymousKey :: Monad m => RSTParser m () -anonymousKey = try $ do - oneOfStrings [".. __:", "__"] - src <- targetURI - pos <- getPosition - let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } - -stripTicks :: String -> String -stripTicks = reverse . stripTick . reverse . stripTick - where stripTick ('`':xs) = xs - stripTick xs = xs - -regularKey :: PandocMonad m => RSTParser m () -regularKey = try $ do - string ".. _" - (_,ref) <- withRaw referenceName - char ':' - src <- targetURI - let key = toKey $ stripTicks ref - --TODO: parse width, height, class and name attributes - updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } - --- --- tables --- - --- General tables TODO: --- - figure out if leading spaces are acceptable and if so, add --- support for them --- --- Simple tables TODO: --- - column spans --- - multiline support --- - ensure that rightmost column span does not need to reach end --- - require at least 2 columns --- --- Grid tables TODO: --- - column spans - -dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many (char ' ') - return (length dashes, length $ dashes ++ sp) - -simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] -simpleDashedLines ch = try $ many1 (dashedLine ch) - --- Parse a table row separator -simpleTableSep :: Monad m => Char -> RSTParser m Char -simpleTableSep ch = try $ simpleDashedLines ch >> newline - --- Parse a table footer -simpleTableFooter :: Monad m => RSTParser m [Char] -simpleTableFooter = try $ simpleTableSep '=' >> blanklines - --- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] -simpleTableRawLine indices = do - line <- many1Till anyChar newline - return (simpleTableSplitLine indices line) - --- Parse a table row and return a list of blocks (columns). -simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks] -simpleTableRow indices = do - notFollowedBy' simpleTableFooter - firstLine <- simpleTableRawLine indices - colLines <- return [] -- TODO - let cols = map unlines . transpose $ firstLine : colLines - mapM (parseFromString (mconcat <$> many plain)) cols - -simpleTableSplitLine :: [Int] -> String -> [String] -simpleTableSplitLine indices line = - map trim - $ tail $ splitByIndices (init indices) line - -simpleTableHeader :: PandocMonad m - => Bool -- ^ Headerless table - -> RSTParser m ([Blocks], [Alignment], [Int]) -simpleTableHeader headless = try $ do - optional blanklines - rawContent <- if headless - then return "" - else simpleTableSep '=' >> anyLine - dashes <- simpleDashedLines '=' <|> simpleDashedLines '-' - newline - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault - let rawHeads = if headless - then replicate (length dashes) "" - else simpleTableSplitLine indices rawContent - heads <- mapM (parseFromString (mconcat <$> many plain)) $ - map trim rawHeads - return (heads, aligns, indices) - --- Parse a simple table. -simpleTable :: PandocMonad m - => Bool -- ^ Headerless table - -> RSTParser m Blocks -simpleTable headless = do - tbl <- tableWith (simpleTableHeader headless) simpleTableRow - sep simpleTableFooter - -- Simple tables get 0s for relative column widths (i.e., use default) - case B.toList tbl of - [Table c a _w h l] -> return $ B.singleton $ - Table c a (replicate (length a) 0) h l - _ -> - throwError $ PandocShouldNeverHappenError - "tableWith returned something unexpected" - where - sep = return () -- optional (simpleTableSep '-') - -gridTable :: PandocMonad m - => Bool -- ^ Headerless table - -> RSTParser m Blocks -gridTable headerless = gridTableWith parseBlocks headerless - -table :: PandocMonad m => RSTParser m Blocks -table = gridTable False <|> simpleTable False <|> - gridTable True <|> simpleTable True <?> "table" - --- --- inline --- - -inline :: PandocMonad m => RSTParser m Inlines -inline = choice [ note -- can start with whitespace, so try before ws - , whitespace - , link - , str - , endline - , strong - , emph - , code - , subst - , interpretedRole - , smart - , hyphens - , escapedChar - , symbol ] <?> "inline" - -parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) - -hyphens :: Monad m => RSTParser m Inlines -hyphens = do - result <- many1 (char '-') - optional endline - -- don't want to treat endline after hyphen or dash as a space - return $ B.str result - -escapedChar :: Monad m => ParserT [Char] st m Inlines -escapedChar = do c <- escaped anyChar - return $ if c == ' ' -- '\ ' is null in RST - then mempty - else B.str [c] - -symbol :: Monad m => RSTParser m Inlines -symbol = do - result <- oneOf specialChars - return $ B.str [result] - --- parses inline code, between codeStart and codeEnd -code :: Monad m => RSTParser m Inlines -code = try $ do - string "``" - result <- manyTill anyChar (try (string "``")) - return $ B.code - $ trim $ unwords $ lines result - --- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: Monad m => RSTParser m a -> RSTParser m a -atStart p = do - pos <- getPosition - st <- getState - -- single quote start can't be right after str - guard $ stateLastStrPos st /= Just pos - p - -emph :: PandocMonad m => RSTParser m Inlines -emph = B.emph . trimInlines . mconcat <$> - enclosed (atStart $ char '*') (char '*') inline - -strong :: PandocMonad m => RSTParser m Inlines -strong = B.strong . trimInlines . mconcat <$> - enclosed (atStart $ string "**") (try $ string "**") inline - --- Note, this doesn't precisely implement the complex rule in --- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules --- but it should be good enough for most purposes --- --- TODO: --- - Classes are silently discarded in addNewRole --- - Lacks sensible implementation for title-reference (which is the default) --- - Allows direct use of the :raw: role, rST only allows inherited use. -interpretedRole :: PandocMonad m => RSTParser m Inlines -interpretedRole = try $ do - (role, contents) <- roleBefore <|> roleAfter - renderRole contents Nothing role nullAttr - -renderRole :: PandocMonad m => String -> Maybe String -> String -> Attr -> RSTParser m Inlines -renderRole contents fmt role attr = case role of - "sup" -> return $ B.superscript $ B.str contents - "superscript" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "subscript" -> return $ B.subscript $ B.str contents - "emphasis" -> return $ B.emph $ B.str contents - "strong" -> return $ B.strong $ B.str contents - "rfc-reference" -> return $ rfcLink contents - "RFC" -> return $ rfcLink contents - "pep-reference" -> return $ pepLink contents - "PEP" -> return $ pepLink contents - "literal" -> return $ B.codeWith attr contents - "math" -> return $ B.math contents - "title-reference" -> titleRef contents - "title" -> titleRef contents - "t" -> titleRef contents - "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents - "span" -> return $ B.spanWith attr $ B.str contents - "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents - custom -> do - customRoles <- stateRstCustomRoles <$> getState - case M.lookup custom customRoles of - Just (newRole, newFmt, newAttr) -> - renderRole contents newFmt newRole newAttr - Nothing -> do - pos <- getPosition - logMessage $ SkippedContent (":" ++ custom ++ ":") pos - return $ B.str contents -- Undefined role - where - titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour - rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) - where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" - pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) - where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" - -addClass :: String -> Attr -> Attr -addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues) - -roleName :: PandocMonad m => RSTParser m String -roleName = many1 (letter <|> char '-') - -roleMarker :: PandocMonad m => RSTParser m String -roleMarker = char ':' *> roleName <* char ':' - -roleBefore :: PandocMonad m => RSTParser m (String,String) -roleBefore = try $ do - role <- roleMarker - contents <- unmarkedInterpretedText - return (role,contents) - -roleAfter :: PandocMonad m => RSTParser m (String,String) -roleAfter = try $ do - contents <- unmarkedInterpretedText - role <- roleMarker <|> (stateRstDefaultRole <$> getState) - return (role,contents) - -unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] -unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar - -whitespace :: PandocMonad m => RSTParser m Inlines -whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" - -str :: Monad m => RSTParser m Inlines -str = do - let strChar = noneOf ("\t\n " ++ specialChars) - result <- many1 strChar - updateLastStrPos - return $ B.str result - --- an endline character that can be treated as a space, not a structural break -endline :: Monad m => RSTParser m Inlines -endline = try $ do - newline - notFollowedBy blankline - -- parse potential list-starts at beginning of line differently in a list: - st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> - notFollowedBy' bulletListStart - else return () - return B.softbreak - --- --- links --- - -link :: PandocMonad m => RSTParser m Inlines -link = choice [explicitLink, referenceLink, autoLink] <?> "link" - -explicitLink :: PandocMonad m => RSTParser m Inlines -explicitLink = try $ do - char '`' - notFollowedBy (char '`') -- `` marks start of inline code - label' <- trimInlines . mconcat <$> - manyTill (notFollowedBy (char '`') >> inline) (char '<') - src <- trim <$> manyTill (noneOf ">\n") (char '>') - skipSpaces - string "`_" - optional $ char '_' -- anonymous form - let label'' = if label' == mempty - then B.str src - else label' - -- `link <google_>` is a reference link to _google! - ((src',tit),attr) <- case reverse src of - '_':xs -> lookupKey [] (toKey (reverse xs)) - _ -> return ((src, ""), nullAttr) - return $ B.linkWith attr (escapeURI src') tit label'' - -referenceLink :: PandocMonad m => RSTParser m Inlines -referenceLink = try $ do - (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <* - char '_' - let isAnonKey (Key ('_':_)) = True - isAnonKey _ = False - state <- getState - let keyTable = stateKeys state - key <- option (toKey $ stripTicks ref) $ - do char '_' - let anonKeys = sort $ filter isAnonKey $ M.keys keyTable - case anonKeys of - [] -> mzero - (k:_) -> return k - ((src,tit), attr) <- lookupKey [] key - -- if anonymous link, remove key so it won't be used again - when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ B.linkWith attr src tit label' - --- We keep a list of oldkeys so we can detect lookup loops. -lookupKey :: PandocMonad m - => [Key] -> Key -> RSTParser m ((String, String), Attr) -lookupKey oldkeys key = do - pos <- getPosition - state <- getState - let keyTable = stateKeys state - case M.lookup key keyTable of - Nothing -> do - let Key key' = key - logMessage $ ReferenceNotFound key' pos - return (("",""),nullAttr) - -- check for keys of the form link_, which need to be resolved: - Just ((u@(_:_),""),_) | last u == '_' -> do - let rawkey = init u - let newkey = toKey rawkey - if newkey `elem` oldkeys - then do - logMessage $ CircularReference rawkey pos - return (("",""),nullAttr) - else lookupKey (key:oldkeys) newkey - Just val -> return val - -autoURI :: Monad m => RSTParser m Inlines -autoURI = do - (orig, src) <- uri - return $ B.link src "" $ B.str orig - -autoEmail :: Monad m => RSTParser m Inlines -autoEmail = do - (orig, src) <- emailAddress - return $ B.link src "" $ B.str orig - -autoLink :: PandocMonad m => RSTParser m Inlines -autoLink = autoURI <|> autoEmail - -subst :: PandocMonad m => RSTParser m Inlines -subst = try $ do - (_,ref) <- withRaw $ enclosed (char '|') (char '|') inline - state <- getState - let substTable = stateSubstitutions state - let key = toKey $ stripFirstAndLast ref - case M.lookup key substTable of - Nothing -> do - pos <- getPosition - logMessage $ ReferenceNotFound (show key) pos - return mempty - Just target -> return target - -note :: PandocMonad m => RSTParser m Inlines -note = try $ do - optional whitespace - ref <- noteMarker - char '_' - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> do - pos <- getPosition - logMessage $ ReferenceNotFound ref pos - return mempty - Just raw -> do - -- We temporarily empty the note list while parsing the note, - -- so that we don't get infinite loops with notes inside notes... - -- Note references inside other notes are allowed in reST, but - -- not yet in this implementation. - updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - let newnotes = if (ref == "*" || ref == "#") -- auto-numbered - -- delete the note so the next auto-numbered note - -- doesn't get the same contents: - then deleteFirstsBy (==) notes [(ref,raw)] - else notes - updateState $ \st -> st{ stateNotes = newnotes } - return $ B.note contents - -smart :: PandocMonad m => RSTParser m Inlines -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice [apostrophe, dash, ellipses] - -singleQuoted :: PandocMonad m => RSTParser m Inlines -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ - B.singleQuoted . trimInlines . mconcat <$> - many1Till inline singleQuoteEnd - -doubleQuoted :: PandocMonad m => RSTParser m Inlines -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ - B.doubleQuoted . trimInlines . mconcat <$> - many1Till inline doubleQuoteEnd diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs deleted file mode 100644 index 3b89f2ee9..000000000 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ /dev/null @@ -1,525 +0,0 @@ -{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-} --- RelaxedPolyRec needed for inlinesBetween on GHC < 7 -{- - Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.TWiki - Copyright : Copyright (C) 2014 Alexander Sulfrian - License : GNU GPL, version 2 or above - - Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de> - Stability : alpha - Portability : portable - -Conversion of twiki text to 'Pandoc' document. --} -module Text.Pandoc.Readers.TWiki ( readTWiki - ) where - -import Text.Pandoc.Definition -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Options -import Text.Pandoc.Logging -import Text.Pandoc.Parsing hiding (enclosed, macro, nested) -import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Control.Monad -import Text.Pandoc.XML (fromEntities) -import Data.Maybe (fromMaybe) -import Text.HTML.TagSoup -import Data.Char (isAlphaNum) -import qualified Data.Foldable as F -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, report) - --- | Read twiki from an input string and return a Pandoc document. -readTWiki :: PandocMonad m - => ReaderOptions - -> String - -> m Pandoc -readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n") - case res of - Left e -> throwError e - Right d -> return d - -type TWParser = ParserT [Char] ParserState - --- --- utility functions --- - -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg - -skip :: TWParser m a -> TWParser m () -skip parser = parser >> return () - -nested :: PandocMonad m => TWParser m a -> TWParser m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) -htmlElement tag = tryMsg tag $ do - (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) - return (htmlAttrToPandoc attr, trim content) - where - endtag = skip $ htmlTag (~== TagClose tag) - endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse - -htmlAttrToPandoc :: [Attribute String] -> Attr -htmlAttrToPandoc attrs = (ident, classes, keyvals) - where - ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs - keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - -parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) -parseHtmlContentWithAttrs tag parser = do - (attr, content) <- htmlElement tag - parsedContent <- try $ parseContent content - return (attr, parsedContent) - where - parseContent = parseFromString $ nested $ manyTill parser endOfContent - endOfContent = try $ skipMany blankline >> skipSpaces >> eof - -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] -parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd - --- --- main parser --- - -parseTWiki :: PandocMonad m => TWParser m Pandoc -parseTWiki = do - bs <- mconcat <$> many block - spaces - eof - return $ B.doc bs - - --- --- block parsers --- - -block :: PandocMonad m => TWParser m B.Blocks -block = do - pos <- getPosition - res <- mempty <$ skipMany1 blankline - <|> blockElements - <|> para - skipMany blankline - report $ ParsingTrace (take 60 $ show $ B.toList res) pos - return res - -blockElements :: PandocMonad m => TWParser m B.Blocks -blockElements = choice [ separator - , header - , verbatim - , literal - , list "" - , table - , blockQuote - , noautolink - ] - -separator :: PandocMonad m => TWParser m B.Blocks -separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule - -header :: PandocMonad m => TWParser m B.Blocks -header = tryMsg "header" $ do - string "---" - level <- many1 (char '+') >>= return . length - guard $ level <= 6 - classes <- option [] $ string "!!" >> return ["unnumbered"] - skipSpaces - content <- B.trimInlines . mconcat <$> manyTill inline newline - attr <- registerHeader ("", classes, []) content - return $ B.headerWith attr level $ content - -verbatim :: PandocMonad m => TWParser m B.Blocks -verbatim = (htmlElement "verbatim" <|> htmlElement "pre") - >>= return . (uncurry B.codeBlockWith) - -literal :: PandocMonad m => TWParser m B.Blocks -literal = htmlElement "literal" >>= return . rawBlock - where - format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs - rawBlock (attrs, content) = B.rawBlock (format attrs) content - -list :: PandocMonad m => String -> TWParser m B.Blocks -list prefix = choice [ bulletList prefix - , orderedList prefix - , definitionList prefix] - -definitionList :: PandocMonad m => String -> TWParser m B.Blocks -definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) - return $ B.definitionList elements - where - parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) - parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces - term <- many1Till inline $ string ": " - line <- listItemLine indent $ string "$ " - return $ (mconcat term, [line]) - -bulletList :: PandocMonad m => String -> TWParser m B.Blocks -bulletList prefix = tryMsg "bulletList" $ - parseList prefix (char '*') (char ' ') - -orderedList :: PandocMonad m => String -> TWParser m B.Blocks -orderedList prefix = tryMsg "orderedList" $ - parseList prefix (oneOf "1iIaA") (string ". ") - -parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks -parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) - return $ case style of - '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks - 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks - 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks - 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks - 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks - _ -> B.bulletList blocks - where - listStyle = do - indent <- many1 $ string " " - style <- marker - return (concat indent, style) - -parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker - -listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat - where - lineContent = do - content <- anyLine - continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation) - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> - string " " >> lineContent - parseContent = parseFromString $ many1 $ nestedList <|> parseInline - parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>= - return . B.plain . mconcat - nestedList = list prefix - lastNewline = try $ char '\n' <* eof - newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList - -table :: PandocMonad m => TWParser m B.Blocks -table = try $ do - tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip - rows <- many1 tableParseRow - return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead - where - buildTable caption rows (aligns, heads) - = B.table caption aligns heads rows - align rows = replicate (columCount rows) (AlignDefault, 0) - columns rows = replicate (columCount rows) mempty - columCount rows = length $ head rows - -tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks) -tableParseHeader = try $ do - char '|' - leftSpaces <- many spaceChar >>= return . length - char '*' - content <- tableColumnContent (char '*' >> skipSpaces >> char '|') - char '*' - rightSpaces <- many spaceChar >>= return . length - optional tableEndOfRow - return (tableAlign leftSpaces rightSpaces, content) - where - tableAlign left right - | left >= 2 && left == right = (AlignCenter, 0) - | left > right = (AlignRight, 0) - | otherwise = (AlignLeft, 0) - -tableParseRow :: PandocMonad m => TWParser m [B.Blocks] -tableParseRow = many1Till tableParseColumn newline - -tableParseColumn :: PandocMonad m => TWParser m B.Blocks -tableParseColumn = char '|' *> skipSpaces *> - tableColumnContent (skipSpaces >> char '|') - <* skipSpaces <* optional tableEndOfRow - -tableEndOfRow :: PandocMonad m => TWParser m Char -tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|' - -tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks -tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat - where - content = continuation <|> inline - continuation = try $ char '\\' >> newline >> return mempty - -blockQuote :: PandocMonad m => TWParser m B.Blocks -blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat - -noautolink :: PandocMonad m => TWParser m B.Blocks -noautolink = do - (_, content) <- htmlElement "noautolink" - st <- getState - setState $ st{ stateAllowLinks = False } - blocks <- try $ parseContent content - setState $ st{ stateAllowLinks = True } - return $ mconcat blocks - where - parseContent = parseFromString $ many $ block - -para :: PandocMonad m => TWParser m B.Blocks -para = many1Till inline endOfParaElement >>= return . result . mconcat - where - endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement - endOfInput = try $ skipMany blankline >> skipSpaces >> eof - endOfPara = try $ blankline >> skipMany1 blankline - newBlockElement = try $ blankline >> skip blockElements - result content = if F.all (==Space) content - then mempty - else B.para $ B.trimInlines content - - --- --- inline parsers --- - -inline :: PandocMonad m => TWParser m B.Inlines -inline = choice [ whitespace - , br - , macro - , strong - , strongHtml - , strongAndEmph - , emph - , emphHtml - , boldCode - , smart - , link - , htmlComment - , code - , codeHtml - , nop - , autoLink - , str - , symbol - ] <?> "inline" - -whitespace :: PandocMonad m => TWParser m B.Inlines -whitespace = (lb <|> regsp) >>= return - where lb = try $ skipMany spaceChar >> linebreak >> return B.space - regsp = try $ skipMany1 spaceChar >> return B.space - -br :: PandocMonad m => TWParser m B.Inlines -br = try $ string "%BR%" >> return B.linebreak - -linebreak :: PandocMonad m => TWParser m B.Inlines -linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) - where lastNewline = eof >> return mempty - innerNewline = return B.space - -between :: (Monoid c, PandocMonad m) - => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) - -> TWParser m c -between start end p = - mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) - -enclosed :: (Monoid b, PandocMonad m) - => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b -enclosed sep p = between sep (try $ sep <* endMarker) p - where - endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof - endSpace = (spaceChar <|> newline) >> return B.space - -macro :: PandocMonad m => TWParser m B.Inlines -macro = macroWithParameters <|> withoutParameters - where - withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan - emptySpan name = buildSpan name [] mempty - -macroWithParameters :: PandocMonad m => TWParser m B.Inlines -macroWithParameters = try $ do - char '%' - name <- macroName - (content, kvs) <- attributes - char '%' - return $ buildSpan name kvs $ B.str content - -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines -buildSpan className kvs = B.spanWith attrs - where - attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs - kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] - -macroName :: PandocMonad m => TWParser m String -macroName = do - first <- letter - rest <- many $ alphaNum <|> char '_' - return (first:rest) - -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>= - return . foldr (either mkContent mkKvs) ([], []) - where - spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) - mkKvs kv (cont, rest) = (cont, (kv : rest)) - -attribute :: PandocMonad m => TWParser m (Either String (String, String)) -attribute = withKey <|> withoutKey - where - withKey = try $ do - key <- macroName - char '=' - parseValue False >>= return . (curry Right key) - withoutKey = try $ parseValue True >>= return . Left - parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) - withoutQuotes allowSpaces - | allowSpaces == True = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" - -nestedInlines :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m B.Inlines -nestedInlines end = innerSpace <|> nestedInline - where - innerSpace = try $ whitespace <* (notFollowedBy end) - nestedInline = notFollowedBy whitespace >> nested inline - -strong :: PandocMonad m => TWParser m B.Inlines -strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong - -strongHtml :: PandocMonad m => TWParser m B.Inlines -strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline) - >>= return . B.strong . mconcat - -strongAndEmph :: PandocMonad m => TWParser m B.Inlines -strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong - -emph :: PandocMonad m => TWParser m B.Inlines -emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph - -emphHtml :: PandocMonad m => TWParser m B.Inlines -emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) - >>= return . B.emph . mconcat - -nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> (count 1 nonspaceChar) - where - innerSpace = try $ many1 spaceChar <* notFollowedBy end - -boldCode :: PandocMonad m => TWParser m B.Inlines -boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities - -htmlComment :: PandocMonad m => TWParser m B.Inlines -htmlComment = htmlTag isCommentTag >> return mempty - -code :: PandocMonad m => TWParser m B.Inlines -code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities - -codeHtml :: PandocMonad m => TWParser m B.Inlines -codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar - return $ B.codeWith attrs $ fromEntities content - -autoLink :: PandocMonad m => TWParser m B.Inlines -autoLink = try $ do - state <- getState - guard $ stateAllowLinks state - (text, url) <- parseLink - guard $ checkLink (head $ reverse url) - return $ makeLink (text, url) - where - parseLink = notFollowedBy nop >> (uri <|> emailAddress) - makeLink (text, url) = B.link url "" $ B.str text - checkLink c - | c == '/' = True - | otherwise = isAlphaNum c - -str :: PandocMonad m => TWParser m B.Inlines -str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str - -nop :: PandocMonad m => TWParser m B.Inlines -nop = try $ (skip exclamation <|> skip nopTag) >> followContent - where - exclamation = char '!' - nopTag = stringAnyCase "<nop>" - followContent = many1 nonspaceChar >>= return . B.str . fromEntities - -symbol :: PandocMonad m => TWParser m B.Inlines -symbol = count 1 nonspaceChar >>= return . B.str - -smart :: PandocMonad m => TWParser m B.Inlines -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice [ apostrophe - , dash - , ellipses - ] - -singleQuoted :: PandocMonad m => TWParser m B.Inlines -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ - many1Till inline singleQuoteEnd >>= - (return . B.singleQuoted . B.trimInlines . mconcat) - -doubleQuoted :: PandocMonad m => TWParser m B.Inlines -doubleQuoted = try $ do - doubleQuoteStart - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> - return (B.doubleQuoted $ B.trimInlines contents)) - <|> (return $ (B.str "\8220") B.<> contents) - -link :: PandocMonad m => TWParser m B.Inlines -link = try $ do - st <- getState - guard $ stateAllowLinks st - setState $ st{ stateAllowLinks = False } - (url, title, content) <- linkText - setState $ st{ stateAllowLinks = True } - return $ B.link url title content - -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) -linkText = do - string "[[" - url <- many1Till anyChar (char ']') - content <- option [B.str url] linkContent - char ']' - return (url, "", mconcat content) - where - linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent - parseLinkContent = parseFromString $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs deleted file mode 100644 index 6594b9ab8..000000000 --- a/src/Text/Pandoc/Readers/Textile.hs +++ /dev/null @@ -1,729 +0,0 @@ -{- -Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' - and John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : Paul Rivier <paul*rivier#demotera*com> - Stability : alpha - Portability : portable - -Conversion from Textile to 'Pandoc' document, based on the spec -available at http://redcloth.org/textile. - -Implemented and parsed: - - Paragraphs - - Code blocks - - Lists - - blockquote - - Inlines : strong, emph, cite, code, deleted, superscript, - subscript, links - - footnotes - - HTML-specific and CSS-specific attributes on headers - -Left to be implemented: - - dimension sign - - all caps - - continued blocks (ex bq..) - -TODO : refactor common patterns across readers : - - more ... - --} - - -module Text.Pandoc.Readers.Textile ( readTextile) where -import Text.Pandoc.CSS -import Text.Pandoc.Definition -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Options -import Text.Pandoc.Parsing -import Text.Pandoc.Logging -import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag ) -import Text.Pandoc.Shared (trim) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.HTML.TagSoup (fromAttrib, Tag(..)) -import Text.HTML.TagSoup.Match -import Data.List ( intercalate, transpose, intersperse ) -import Data.Char ( digitToInt, isUpper ) -import Control.Monad ( guard, liftM ) -import Data.Monoid ((<>)) -import Text.Pandoc.Class (PandocMonad, report) -import Control.Monad.Except (throwError) - --- | Parse a Textile text and return a Pandoc document. -readTextile :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> m Pandoc -readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n") - case parsed of - Right result -> return result - Left e -> throwError e - - --- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc -parseTextile = do - many blankline - startPos <- getPosition - -- go through once just to get list of reference keys and notes - -- docMinusKeys is the raw document with blanks where the keys/notes were... - let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . concat - setPosition startPos - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - -- now parse it for real... - blocks <- parseBlocks - return $ Pandoc nullMeta (B.toList blocks) -- FIXME - -noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') - -noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteBlock = try $ do - startPos <- getPosition - ref <- noteMarker - optional blankline - contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock) - endPos <- getPosition - let newnote = (ref, contents ++ "\n") - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - --- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks -parseBlocks = mconcat <$> manyTill block eof - --- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] -blockParsers = [ codeBlock - , header - , blockQuote - , hrule - , commentBlock - , anyList - , rawHtmlBlock - , rawLaTeXBlock' - , table - , maybeExplicitBlock "p" para - , mempty <$ blanklines - ] - --- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT [Char] ParserState m Blocks -block = do - res <- choice blockParsers <?> "block" - pos <- getPosition - report $ ParsingTrace (take 60 $ show $ B.toList res) pos - return res - -commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks -commentBlock = try $ do - string "###." - manyTill anyLine blanklines - return mempty - -codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks -codeBlock = codeBlockBc <|> codeBlockPre - -codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks -codeBlockBc = try $ do - string "bc." - extended <- option False (True <$ char '.') - char ' ' - let starts = ["p", "table", "bq", "bc", "h1", "h2", "h3", - "h4", "h5", "h6", "pre", "###", "notextile"] - let ender = choice $ map explicitBlockStart starts - contents <- if extended - then do - f <- anyLine - rest <- many (notFollowedBy ender *> anyLine) - return (f:rest) - else manyTill anyLine blanklines - return $ B.codeBlock (trimTrailingNewlines (unlines contents)) - -trimTrailingNewlines :: String -> String -trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse - --- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks -codeBlockPre = try $ do - (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) - optional blanklines - -- drop leading newline if any - let result'' = case result' of - '\n':xs -> xs - _ -> result' - -- drop trailing newline if any - let result''' = case reverse result'' of - '\n':_ -> init result'' - _ -> result'' - let classes = words $ fromAttrib "class" t - let ident = fromAttrib "id" t - let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.codeBlockWith (ident,classes,kvs) result''' - --- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT [Char] ParserState m Blocks -header = try $ do - char 'h' - level <- digitToInt <$> oneOf "123456" - attr <- attributes - char '.' - lookAhead whitespace - name <- trimInlines . mconcat <$> many inline - attr' <- registerHeader attr name - return $ B.headerWith attr' level name - --- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks -blockQuote = try $ do - string "bq" >> attributes >> char '.' >> whitespace - B.blockQuote <$> para - --- Horizontal rule - -hrule :: PandocMonad m => ParserT [Char] st m Blocks -hrule = try $ do - skipSpaces - start <- oneOf "-*" - count 2 (skipSpaces >> char start) - skipMany (spaceChar <|> char start) - newline - optional blanklines - return B.horizontalRule - --- Lists handling - --- | Can be a bullet list or an ordered list. This implementation is --- strict in the nesting, sublist must start at exactly "parent depth --- plus one" -anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks -anyList = try $ anyListAtDepth 1 <* blanklines - --- | This allow one type of list to be nested into an other type, --- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks -anyListAtDepth depth = choice [ bulletListAtDepth depth, - orderedListAtDepth depth, - definitionList ] - --- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks -bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) - --- | Bullet List Item of given depth, depth being the number of --- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks -bulletListItemAtDepth = genericListItemAtDepth '*' - --- | Ordered List of given depth, depth being the number of --- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks -orderedListAtDepth depth = try $ do - items <- many1 (orderedListItemAtDepth depth) - return $ B.orderedList items - --- | Ordered List Item of given depth, depth being the number of --- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks -orderedListItemAtDepth = genericListItemAtDepth '#' - --- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks -genericListItemAtDepth c depth = try $ do - count depth (char c) >> attributes >> whitespace - p <- mconcat <$> many listInline - newline - sublist <- option mempty (anyListAtDepth (depth + 1)) - return $ (B.plain p) <> sublist - --- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks -definitionList = try $ B.definitionList <$> many1 definitionListItem - --- | List start character. -listStart :: PandocMonad m => ParserT [Char] ParserState m () -listStart = genericListStart '*' - <|> () <$ genericListStart '#' - <|> () <$ definitionListStart - -genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () -genericListStart c = () <$ try (many1 (char c) >> whitespace) - -basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () -basicDLStart = do - char '-' - whitespace - notFollowedBy newline - -definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines -definitionListStart = try $ do - basicDLStart - trimInlines . mconcat <$> - many1Till inline - ( try (newline *> lookAhead basicDLStart) - <|> try (lookAhead (() <$ string ":=")) - ) - -listInline :: PandocMonad m => ParserT [Char] ParserState m Inlines -listInline = try (notFollowedBy newline >> inline) - <|> try (endline <* notFollowedBy listStart) - --- | A definition list item in textile begins with '- ', followed by --- the term defined, then spaces and ":=". The definition follows, on --- the same single line, or spaned on multiple line, after a line --- break. -definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) -definitionListItem = try $ do - term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart - def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) - return (term, def') - where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] - inlineDef = liftM (\d -> [B.plain d]) - $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline - multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] - multilineDef = try $ do - optional whitespace >> newline - s <- many1Till anyChar (try (string "=:" >> newline)) - -- this ++ "\n\n" does not look very good - ds <- parseFromString parseBlocks (s ++ "\n\n") - return [ds] - --- raw content - --- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks -rawHtmlBlock = try $ do - skipMany spaceChar - (_,b) <- htmlTag isBlockTag - optional blanklines - return $ B.rawBlock "html" b - --- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks -rawLaTeXBlock' = do - guardEnabled Ext_raw_tex - B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) - - --- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT [Char] ParserState m Blocks -para = B.para . trimInlines . mconcat <$> many1 inline - --- Tables - -toAlignment :: Char -> Alignment -toAlignment '<' = AlignLeft -toAlignment '>' = AlignRight -toAlignment '=' = AlignCenter -toAlignment _ = AlignDefault - -cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) -cellAttributes = try $ do - isHeader <- option False (True <$ char '_') - -- we just ignore colspan and rowspan markers: - optional $ try $ oneOf "/\\" >> many1 digit - -- we pay attention to alignments: - alignment <- option AlignDefault $ toAlignment <$> oneOf "<>=" - -- ignore other attributes for now: - _ <- attributes - char '.' - return (isHeader, alignment) - --- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) -tableCell = try $ do - char '|' - (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes - notFollowedBy blankline - raw <- trim <$> - many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) - content <- mconcat <$> parseFromString (many inline) raw - return ((isHeader, alignment), B.plain content) - --- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] -tableRow = try $ do - -- skip optional row attributes - optional $ try $ do - _ <- attributes - char '.' - many1 spaceChar - many1 tableCell <* char '|' <* blankline - --- | A table with an optional header. -table :: PandocMonad m => ParserT [Char] ParserState m Blocks -table = try $ do - -- ignore table attributes - caption <- option mempty $ try $ do - string "table" - _ <- attributes - char '.' - rawcapt <- trim <$> anyLine - parseFromString (mconcat <$> many inline) rawcapt - rawrows <- many1 $ (skipMany ignorableRow) >> tableRow - skipMany ignorableRow - blanklines - let (headers, rows) = case rawrows of - (toprow:rest) | any (fst . fst) toprow -> - (toprow, rest) - _ -> (mempty, rawrows) - let nbOfCols = max (length headers) (length $ head rows) - let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows) - return $ B.table caption - (zip aligns (replicate nbOfCols 0.0)) - (map snd headers) - (map (map snd) rows) - --- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () -ignorableRow = try $ do - char '|' - oneOf ":^-~" - _ <- attributes - char '.' - _ <- anyLine - return () - -explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () -explicitBlockStart name = try $ do - string name - attributes - char '.' - optional whitespace - optional endline - --- | Blocks like 'p' and 'table' do not need explicit block tag. --- However, they can be used to set HTML/CSS attributes when needed. -maybeExplicitBlock :: PandocMonad m - => String -- ^ block tag name - -> ParserT [Char] ParserState m Blocks -- ^ implicit block - -> ParserT [Char] ParserState m Blocks -maybeExplicitBlock name blk = try $ do - optional $ explicitBlockStart name - blk - - - ----------- --- Inlines ----------- - - --- | Any inline element -inline :: PandocMonad m => ParserT [Char] ParserState m Inlines -inline = do - choice inlineParsers <?> "inline" - --- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] -inlineParsers = [ str - , whitespace - , endline - , code - , escapedInline - , inlineMarkup - , groupedInlineMarkup - , rawHtmlInline - , rawLaTeXInline' - , note - , link - , image - , mark - , (B.str . (:[])) <$> characterReference - , smartPunctuation inline - , symbol - ] - --- | Inline markups -inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines -inlineMarkup = choice [ simpleInline (string "??") (B.cite []) - , simpleInline (string "**") B.strong - , simpleInline (string "__") B.emph - , simpleInline (char '*') B.strong - , simpleInline (char '_') B.emph - , simpleInline (char '+') B.emph -- approximates underline - , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout - , simpleInline (char '^') B.superscript - , simpleInline (char '~') B.subscript - , simpleInline (char '%') id - ] - --- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT [Char] st m Inlines -mark = try $ char '(' >> (try tm <|> try reg <|> copy) - -reg :: PandocMonad m => ParserT [Char] st m Inlines -reg = do - oneOf "Rr" - char ')' - return $ B.str "\174" - -tm :: PandocMonad m => ParserT [Char] st m Inlines -tm = do - oneOf "Tt" - oneOf "Mm" - char ')' - return $ B.str "\8482" - -copy :: PandocMonad m => ParserT [Char] st m Inlines -copy = do - oneOf "Cc" - char ')' - return $ B.str "\169" - -note :: PandocMonad m => ParserT [Char] ParserState m Inlines -note = try $ do - ref <- (char '[' *> many1 digit <* char ']') - notes <- stateNotes <$> getState - case lookup ref notes of - Nothing -> fail "note not found" - Just raw -> B.note <$> parseFromString parseBlocks raw - --- | Special chars -markupChars :: [Char] -markupChars = "\\*#_@~-+^|%=[]&" - --- | Break strings on following chars. Space tab and newline break for --- inlines breaking. Open paren breaks for mark. Quote, dash and dot --- break for smart punctuation. Punctuation breaks for regular --- punctuation. Double quote breaks for named links. > and < break --- for inline html. -stringBreakers :: [Char] -stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]" - -wordBoundaries :: [Char] -wordBoundaries = markupChars ++ stringBreakers - --- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String -hyphenedWords = do - x <- wordChunk - xs <- many (try $ char '-' >> wordChunk) - return $ intercalate "-" (x:xs) - -wordChunk :: PandocMonad m => ParserT [Char] ParserState m String -wordChunk = try $ do - hd <- noneOf wordBoundaries - tl <- many ( (noneOf wordBoundaries) <|> - try (notFollowedBy' note *> oneOf markupChars - <* lookAhead (noneOf wordBoundaries) ) ) - return $ hd:tl - --- | Any string -str :: PandocMonad m => ParserT [Char] ParserState m Inlines -str = do - baseStr <- hyphenedWords - -- RedCloth compliance : if parsed word is uppercase and immediatly - -- followed by parens, parens content is unconditionally word acronym - fullStr <- option baseStr $ try $ do - guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar' - return $ concat [baseStr, " (", acro, ")"] - updateLastStrPos - return $ B.str fullStr - --- | Some number of space chars -whitespace :: PandocMonad m => ParserT [Char] st m Inlines -whitespace = many1 spaceChar >> return B.space <?> "whitespace" - --- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT [Char] ParserState m Inlines -endline = try $ do - newline - notFollowedBy blankline - notFollowedBy listStart - notFollowedBy rawHtmlBlock - return B.linebreak - -rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines -rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag - --- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines -rawLaTeXInline' = try $ do - guardEnabled Ext_raw_tex - B.singleton <$> rawLaTeXInline - --- | Textile standard link syntax is "label":target. But we --- can also have ["label":target]. -link :: PandocMonad m => ParserT [Char] ParserState m Inlines -link = try $ do - bracketed <- (True <$ char '[') <|> return False - char '"' *> notFollowedBy (oneOf " \t\n\r") - attr <- attributes - name <- trimInlines . mconcat <$> - withQuoteContext InDoubleQuote (many1Till inline (char '"')) - char ':' - let stop = if bracketed - then char ']' - else lookAhead $ space <|> - try (oneOf "!.,;:" *> (space <|> newline)) - url <- many1Till nonspaceChar stop - let name' = if B.toList name == [Str "$"] then B.str url else name - return $ if attr == nullAttr - then B.link url "" name' - else B.spanWith attr $ B.link url "" name' - --- | image embedding -image :: PandocMonad m => ParserT [Char] ParserState m Inlines -image = try $ do - char '!' >> notFollowedBy space - (ident, cls, kvs) <- attributes - let attr = case lookup "style" kvs of - Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) - Nothing -> (ident, cls, kvs) - src <- many1 (noneOf " \t\n\r!(") - alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')') - char '!' - return $ B.imageWith attr src alt (B.str alt) - -escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedInline = escapedEqs <|> escapedTag - -escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedEqs = B.str <$> - (try $ string "==" *> manyTill anyChar' (try $ string "==")) - --- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedTag = B.str <$> - (try $ string "<notextile>" *> - manyTill anyChar' (try $ string "</notextile>")) - --- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines -symbol = B.str . singleton <$> (notFollowedBy newline *> - notFollowedBy rawHtmlBlock *> - oneOf wordBoundaries) - --- | Inline code -code :: PandocMonad m => ParserT [Char] ParserState m Inlines -code = code1 <|> code2 - --- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char -anyChar' = - satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline) - -code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code1 = B.code <$> surrounded (char '@') anyChar' - -code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code2 = do - htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) - --- | Html / CSS attributes -attributes :: PandocMonad m => ParserT [Char] ParserState m Attr -attributes = (foldl (flip ($)) ("",[],[])) <$> - try (do special <- option id specialAttribute - attrs <- many attribute - return (special : attrs)) - -specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) -specialAttribute = do - alignStr <- ("center" <$ char '=') <|> - ("justify" <$ try (string "<>")) <|> - ("right" <$ char '>') <|> - ("left" <$ char '<') - notFollowedBy spaceChar - return $ addStyle ("text-align:" ++ alignStr) - -attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) -attribute = try $ - (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar - -classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) -classIdAttr = try $ do -- (class class #id) - char '(' - ws <- words `fmap` manyTill anyChar' (char ')') - case reverse ws of - [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) - (('#':ident'):classes') -> return $ \(_,_,keyvals) -> - (ident',classes',keyvals) - classes' -> return $ \(_,_,keyvals) -> - ("",classes',keyvals) - -styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) -styleAttr = do - style <- try $ enclosed (char '{') (char '}') anyChar' - return $ addStyle style - -addStyle :: String -> Attr -> Attr -addStyle style (id',classes,keyvals) = - (id',classes,keyvals') - where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] - style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] - -langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) -langAttr = do - lang <- try $ enclosed (char '[') (char ']') alphaNum - return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) - --- | Parses material surrounded by a parser. -surrounded :: PandocMonad m - => ParserT [Char] st m t -- ^ surrounding parser - -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) - -> ParserT [Char] st m [a] -surrounded border = - enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) - -simpleInline :: PandocMonad m - => ParserT [Char] ParserState m t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = try $ do - notAfterString - border *> notFollowedBy (oneOf " \t\n\r") - attr <- attributes - body <- trimInlines . mconcat <$> - withQuoteContext InSingleQuote - (manyTill (notFollowedBy newline >> inline) - (try border <* notFollowedBy alphaNum)) - return $ construct $ - if attr == nullAttr - then body - else B.spanWith attr body - -groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines -groupedInlineMarkup = try $ do - char '[' - sp1 <- option mempty $ B.space <$ whitespace - result <- withQuoteContext InSingleQuote inlineMarkup - sp2 <- option mempty $ B.space <$ whitespace - char ']' - return $ sp1 <> result <> sp2 - --- | Create a singleton list -singleton :: a -> [a] -singleton x = [x] - diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs deleted file mode 100644 index 9e2b6963d..000000000 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ /dev/null @@ -1,596 +0,0 @@ -{-# LANGUAGE ViewPatterns #-} -{- -Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Txt2Tags - Copyright : Copyright (C) 2014 Matthew Pickering - License : GNU GPL, version 2 or above - - Maintainer : Matthew Pickering <matthewtpickering@gmail.com> - -Conversion of txt2tags formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags - , getT2TMeta - , T2TMeta (..) - ) - where - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Data.Monoid ((<>)) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI,compactify, compactifyDL) -import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) -import Data.Char (toLower) -import Data.List (transpose, intersperse, intercalate) -import Data.Maybe (fromMaybe) ---import Network.URI (isURI) -- Not sure whether to use this function -import Control.Monad (void, guard, when) -import Data.Default -import Control.Monad.Reader (Reader, runReader, asks) - -import Data.Time.Format (formatTime) -import Text.Pandoc.Compat.Time (defaultTimeLocale) -import Control.Monad.Except (throwError, catchError) -import Text.Pandoc.Class (PandocMonad) -import qualified Text.Pandoc.Class as P - -type T2T = ParserT String ParserState (Reader T2TMeta) - --- | An object for the T2T macros meta information --- the contents of each field is simply substituted verbatim into the file -data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile - , infile :: FilePath -- ^ Input file - , outfile :: FilePath -- ^ Output file - } deriving Show - -instance Default T2TMeta where - def = T2TMeta "" "" "" "" - --- | Get the meta information required by Txt2Tags macros -getT2TMeta :: PandocMonad m => m T2TMeta -getT2TMeta = do - mbInps <- P.getInputFiles - let inps = case mbInps of - Just x -> x - Nothing -> [] - mbOutp <- P.getOutputFile - let outp = case mbOutp of - Just x -> x - Nothing -> "" - curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime - let getModTime = fmap (formatTime defaultTimeLocale "%T") . - P.getModificationTime - curMtime <- case inps of - [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime - _ -> catchError - (maximum <$> mapM getModTime inps) - (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) outp - --- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: PandocMonad m - => ReaderOptions - -> String - -> m Pandoc -readTxt2Tags opts s = do - meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") - case parsed of - Right result -> return $ result - Left e -> throwError e - --- | Read Txt2Tags (ignoring all macros) from an input string returning --- a Pandoc document --- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc --- readTxt2TagsNoMacros = readTxt2Tags - -parseT2T :: T2T Pandoc -parseT2T = do - -- Parse header if standalone flag is set - standalone <- getOption readerStandalone - when standalone parseHeader - body <- mconcat <$> manyTill block eof - meta' <- stateMeta <$> getState - return $ Pandoc meta' (B.toList body) - -parseHeader :: T2T () -parseHeader = do - () <$ try blankline <|> header - meta <- stateMeta <$> getState - optional blanklines - config <- manyTill setting (notFollowedBy setting) - -- TODO: Handle settings better - let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) meta config - updateState (\s -> s {stateMeta = settings}) <* optional blanklines - -header :: T2T () -header = titleline >> authorline >> dateline - -headerline :: B.ToMetaValue a => String -> T2T a -> T2T () -headerline field p = (() <$ try blankline) - <|> (p >>= updateState . B.setMeta field) - -titleline :: T2T () -titleline = - headerline "title" (trimInlines . mconcat <$> manyTill inline newline) - -authorline :: T2T () -authorline = - headerline "author" (sepBy author (char ';') <* newline) - where - author = trimInlines . mconcat <$> many (notFollowedBy (char ';' <|> newline) >> inline) - -dateline :: T2T () -dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) - -type Keyword = String -type Value = String - -setting :: T2T (Keyword, Value) -setting = do - string "%!" - keyword <- ignoreSpacesCap (many1 alphaNum) - char ':' - value <- ignoreSpacesCap (manyTill anyChar (newline)) - return (keyword, value) - --- Blocks - -parseBlocks :: T2T Blocks -parseBlocks = mconcat <$> manyTill block eof - -block :: T2T Blocks -block = do - choice - [ mempty <$ blanklines - , quote - , hrule -- hrule must go above title - , title - , commentBlock - , verbatim - , rawBlock - , taggedBlock - , list - , table - , para - ] - -title :: T2T Blocks -title = try $ balancedTitle '+' <|> balancedTitle '=' - -balancedTitle :: Char -> T2T Blocks -balancedTitle c = try $ do - spaces - level <- length <$> many1 (char c) - guard (level <= 5) -- Max header level 5 - heading <- manyTill (noneOf "\n\r") (count level (char c)) - label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) - many spaceChar *> newline - let attr = maybe nullAttr (\x -> (x, [], [])) label - return $ B.headerWith attr level (trimInlines $ B.text heading) - -para :: T2T Blocks -para = try $ do - ils <- parseInlines - nl <- option False (True <$ newline) - option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils)) - where - listStart = try bulletListStart <|> orderedListStart - -commentBlock :: T2T Blocks -commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment - --- Seperator and Strong line treated the same -hrule :: T2T Blocks -hrule = try $ do - spaces - line <- many1 (oneOf "=-_") - guard (length line >= 20) - B.horizontalRule <$ blankline - -quote :: T2T Blocks -quote = try $ do - lookAhead tab - rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") - return $ B.blockQuote contents - -commentLine :: T2T Inlines -commentLine = comment - --- List Parsing code from Org Reader - -list :: T2T Blocks -list = choice [bulletList, orderedList, definitionList] - -bulletList :: T2T Blocks -bulletList = B.bulletList . compactify - <$> many1 (listItem bulletListStart parseBlocks) - -orderedList :: T2T Blocks -orderedList = B.orderedList . compactify - <$> many1 (listItem orderedListStart parseBlocks) - -definitionList :: T2T Blocks -definitionList = try $ do - B.definitionList . compactifyDL <$> - many1 (listItem definitionListStart definitionListEnd) - -definitionListEnd :: T2T (Inlines, [Blocks]) -definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks) - -genericListStart :: T2T Char - -> T2T Int -genericListStart listMarker = try $ - (2+) <$> (length <$> many spaceChar - <* listMarker <* space <* notFollowedBy space) - --- parses bullet list \start and returns its length (excl. following whitespace) -bulletListStart :: T2T Int -bulletListStart = genericListStart (char '-') - -orderedListStart :: T2T Int -orderedListStart = genericListStart (char '+' ) - -definitionListStart :: T2T Int -definitionListStart = genericListStart (char ':') - --- parse raw text for one list item, excluding start marker and continuations -listItem :: T2T Int - -> T2T a - -> T2T a -listItem start end = try $ do - markerLength <- try start - firstLine <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString end $ firstLine ++ blank ++ rest - --- continuation of a list item - indented and separated by blankline or endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Int - -> T2T String -listContinuation markerLength = try $ - notFollowedBy' (blankline >> blankline) - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) - where listLine = try $ indentWith markerLength *> anyLineNewline - -anyLineNewline :: T2T String -anyLineNewline = (++ "\n") <$> anyLine - -indentWith :: Int -> T2T String -indentWith n = count n space - --- Table - -table :: T2T Blocks -table = try $ do - tableHeader <- fmap snd <$> option mempty (try headerRow) - rows <- many1 (many commentLine *> tableRow) - let columns = transpose rows - let ncolumns = length columns - let aligns = map (foldr1 findAlign) (map (map fst) columns) - let rows' = map (map snd) rows - let size = maximum (map length rows') - let rowsPadded = map (pad size) rows' - let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty - return $ B.table mempty - (zip aligns (replicate ncolumns 0.0)) - headerPadded rowsPadded - -pad :: (Monoid a) => Int -> [a] -> [a] -pad n xs = xs ++ (replicate (n - length xs) mempty) - - -findAlign :: Alignment -> Alignment -> Alignment -findAlign x y - | x == y = x - | otherwise = AlignDefault - -headerRow :: T2T [(Alignment, Blocks)] -headerRow = genericRow (string "||") - -tableRow :: T2T [(Alignment, Blocks)] -tableRow = genericRow (char '|') - -genericRow :: T2T a -> T2T [(Alignment, Blocks)] -genericRow start = try $ do - spaces *> start - manyTill tableCell newline <?> "genericRow" - - -tableCell :: T2T (Alignment, Blocks) -tableCell = try $ do - leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead - content <- (manyTill inline (try $ lookAhead (cellEnd))) - rightSpaces <- length <$> many space - let align = - case compare leftSpaces rightSpaces of - LT -> AlignLeft - EQ -> AlignCenter - GT -> AlignRight - endOfCell - return $ (align, B.plain (B.trimInlines $ mconcat content)) - where - cellEnd = (void newline <|> (many1 space *> endOfCell)) - -endOfCell :: T2T () -endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline) - --- Raw area - -verbatim :: T2T Blocks -verbatim = genericBlock anyLineNewline B.codeBlock "```" - -rawBlock :: T2T Blocks -rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\"" - -taggedBlock :: T2T Blocks -taggedBlock = do - target <- getTarget - genericBlock anyLineNewline (B.rawBlock target) "'''" - --- Generic - -genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks -genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s - -blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupArea p f s = try $ (do - string s *> blankline - f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline)))) - -blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupLine p f s = try (f <$> (string s *> space *> p)) - --- Can be in either block or inline position -comment :: Monoid a => T2T a -comment = try $ do - atStart - notFollowedBy macro - mempty <$ (char '%' *> anyLine) - --- Inline - -parseInlines :: T2T Inlines -parseInlines = trimInlines . mconcat <$> many1 inline - -inline :: T2T Inlines -inline = do - choice - [ endline - , macro - , commentLine - , whitespace - , url - , link - , image - , bold - , underline - , code - , raw - , tagged - , strike - , italic - , code - , str - , symbol - ] - -bold :: T2T Inlines -bold = inlineMarkup inline B.strong '*' (B.str) - -underline :: T2T Inlines -underline = inlineMarkup inline B.emph '_' (B.str) - -strike :: T2T Inlines -strike = inlineMarkup inline B.strikeout '-' (B.str) - -italic :: T2T Inlines -italic = inlineMarkup inline B.emph '/' (B.str) - -code :: T2T Inlines -code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id - -raw :: T2T Inlines -raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id - -tagged :: T2T Inlines -tagged = do - target <- getTarget - inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id - --- Parser for markup indicated by a double character. --- Inline markup is greedy and glued --- Greedy meaning ***a*** = Bold [Str "*a*"] --- Glued meaning that markup must be tight to content --- Markup can't pass newlines -inlineMarkup :: Monoid a - => (T2T a) -- Content parser - -> (a -> Inlines) -- Constructor - -> Char -- Fence - -> (String -> a) -- Special Case to handle ****** - -> T2T Inlines -inlineMarkup p f c special = try $ do - start <- many1 (char c) - let l = length start - guard (l >= 2) - when (l == 2) (void $ notFollowedBy space) - -- We must make sure that there is no space before the start of the - -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") $ - (try $ lookAhead (noneOf " " >> string [c,c] ))) - case body of - Just middle -> do - lastChar <- anyChar - end <- many1 (char c) - let parser inp = parseFromString (mconcat <$> many p) inp - let start' = case drop 2 start of - "" -> mempty - xs -> special xs - body' <- parser (middle ++ [lastChar]) - let end' = case drop 2 end of - "" -> mempty - xs -> special xs - return $ f (start' <> body' <> end') - Nothing -> do -- Either bad or case such as ***** - guard (l >= 5) - let body' = (replicate (l - 4) c) - return $ f (special body') - -link :: T2T Inlines -link = try imageLink <|> titleLink - --- Link with title -titleLink :: T2T Inlines -titleLink = try $ do - char '[' - notFollowedBy space - tokens <- sepBy1 (many $ noneOf " ]") space - guard (length tokens >= 2) - char ']' - let link' = last tokens - guard (length link' > 0) - let tit = concat (intersperse " " (init tokens)) - return $ B.link link' "" (B.text tit) - --- Link with image -imageLink :: T2T Inlines -imageLink = try $ do - char '[' - body <- image - many1 space - l <- manyTill (noneOf "\n\r ") (char ']') - return (B.link l "" body) - -macro :: T2T Inlines -macro = try $ do - name <- string "%%" *> oneOfStringsCI (map fst commands) - optional (try $ enclosed (char '(') (char ')') anyChar) - lookAhead (spaceChar <|> oneOf specialChars <|> newline) - maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) - where - commands = [ ("date", date), ("mtime", mtime) - , ("infile", infile), ("outfile", outfile)] - --- raw URLs in text are automatically linked -url :: T2T Inlines -url = try $ do - (rawUrl, escapedUrl) <- (try uri <|> emailAddress) - return $ B.link rawUrl "" (B.str escapedUrl) - -uri :: T2T (String, String) -uri = try $ do - address <- t2tURI - return (address, escapeURI address) - --- The definition of a URI in the T2T source differs from the --- actual definition. This is a transcription of the definition in --- the source of v2.6 ---isT2TURI :: String -> Bool ---isT2TURI (parse t2tURI "" -> Right _) = True ---isT2TURI _ = False - -t2tURI :: T2T String -t2tURI = do - start <- try ((++) <$> proto <*> urlLogin) <|> guess - domain <- many1 chars - sep <- many (char '/') - form' <- option mempty ((:) <$> char '?' <*> many1 form) - anchor' <- option mempty ((:) <$> char '#' <*> many anchor) - return (start ++ domain ++ sep ++ form' ++ anchor') - where - protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] - proto = (++) <$> oneOfStrings protos <*> string "://" - guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) - <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') - login = alphaNum <|> oneOf "_.-" - pass = many (noneOf " @") - chars = alphaNum <|> oneOf "%._/~:,=$@&+-" - anchor = alphaNum <|> oneOf "%._0" - form = chars <|> oneOf ";*" - urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') - - -image :: T2T Inlines -image = try $ do - -- List taken from txt2tags source - let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] - char '[' - path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions)) - ext <- oneOfStrings extensions - char ']' - return $ B.image (path ++ ext) "" mempty - --- Characters used in markup -specialChars :: String -specialChars = "%*-_/|:+;" - -tab :: T2T Char -tab = char '\t' - -space :: T2T Char -space = char ' ' - -spaces :: T2T String -spaces = many space - -endline :: T2T Inlines -endline = try $ do - newline - notFollowedBy blankline - notFollowedBy hrule - notFollowedBy title - notFollowedBy verbatim - notFollowedBy rawBlock - notFollowedBy taggedBlock - notFollowedBy quote - notFollowedBy list - notFollowedBy table - return $ B.softbreak - -str :: T2T Inlines -str = try $ do - B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - -whitespace :: T2T Inlines -whitespace = try $ B.space <$ spaceChar - -symbol :: T2T Inlines -symbol = B.str . (:[]) <$> oneOf specialChars - --- Utility - -getTarget :: T2T String -getTarget = do - mv <- lookupMeta "target" . stateMeta <$> getState - let MetaString target = fromMaybe (MetaString "html") mv - return target - -atStart :: T2T () -atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) - -ignoreSpacesCap :: T2T String -> T2T String -ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs deleted file mode 100644 index 85b298a85..000000000 --- a/src/Text/Pandoc/SelfContained.hs +++ /dev/null @@ -1,181 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for converting an HTML file into one that can be viewed -offline, by incorporating linked images, CSS, and scripts into -the HTML using data URIs. --} -module Text.Pandoc.SelfContained ( makeSelfContained ) where -import Text.HTML.TagSoup -import Network.URI (isURI, escapeURIString, URI(..), parseURI) -import Data.ByteString.Base64 -import qualified Data.ByteString.Char8 as B -import Data.ByteString (ByteString) -import System.FilePath (takeExtension, takeDirectory, (</>)) -import Data.Char (toLower, isAscii, isAlphaNum) -import Codec.Compression.GZip as Gzip -import qualified Data.ByteString.Lazy as L -import Control.Monad.Trans (MonadIO(..)) -import Text.Pandoc.Shared (renderTags', err, warn, trim) -import Text.Pandoc.MediaBag (MediaBag) -import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.UTF8 (toString) -import Text.Pandoc.Options (WriterOptions(..)) -import Data.List (isPrefixOf) -import Control.Applicative ((<|>)) -import Text.Parsec (runParserT, ParsecT) -import qualified Text.Parsec as P -import Control.Monad.Trans (lift) -import Text.Pandoc.Class (fetchItem, runIO, setMediaBag) - -isOk :: Char -> Bool -isOk c = isAscii c && isAlphaNum c - -makeDataURI :: String -> ByteString -> String -makeDataURI mime raw = - if textual - then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) - else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) - where textual = "text/" `Data.List.isPrefixOf` mime - mime' = if textual && ';' `notElem` mime - then mime ++ ";charset=utf-8" - else mime -- mime type already has charset - -convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) -convertTag media sourceURL t@(TagOpen tagname as) - | tagname `elem` - ["img", "embed", "video", "input", "audio", "source", "track"] = do - as' <- mapM processAttribute as - return $ TagOpen tagname as' - where processAttribute (x,y) = - if x == "src" || x == "href" || x == "poster" - then do - enc <- getDataURI media sourceURL (fromAttrib "type" t) y - return (x, enc) - else return (x,y) -convertTag media sourceURL t@(TagOpen "script" as) = - case fromAttrib "src" t of - [] -> return t - src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src - return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag media sourceURL t@(TagOpen "link" as) = - case fromAttrib "href" t of - [] -> return t - src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src - return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ _ t = return t - -cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString - -> IO ByteString -cssURLs media sourceURL d orig = do - res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig - case res of - Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig - Right bs -> return bs - -parseCSSUrls :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -parseCSSUrls media sourceURL d = B.concat <$> P.many - (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) - --- Note: some whitespace in CSS is significant, so we can't collapse it! -pCSSWhite :: ParsecT ByteString () IO ByteString -pCSSWhite = B.singleton <$> P.space <* P.spaces - -pCSSComment :: ParsecT ByteString () IO ByteString -pCSSComment = P.try $ do - P.string "/*" - P.manyTill P.anyChar (P.try (P.string "*/")) - return B.empty - -pCSSOther :: ParsecT ByteString () IO ByteString -pCSSOther = do - (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|> - (B.singleton <$> P.char 'u') <|> - (B.singleton <$> P.char '/') - -pCSSUrl :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -pCSSUrl media sourceURL d = P.try $ do - P.string "url(" - P.spaces - quote <- P.option Nothing (Just <$> P.oneOf "\"'") - url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) - P.spaces - P.char ')' - let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ - maybe "" (:[]) quote ++ ")") - case trim url of - '#':_ -> return fallback - 'd':'a':'t':'a':':':_ -> return fallback - u -> do let url' = if isURI u then u else d </> u - enc <- lift $ getDataURI media sourceURL "" url' - return (B.pack $ "url(" ++ enc ++ ")") - - -getDataURI :: MediaBag -> Maybe String -> MimeType -> String - -> IO String -getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri -getDataURI media sourceURL mimetype src = do - let ext = map toLower $ takeExtension src - fetchResult <- runIO $ do setMediaBag media - fetchItem sourceURL src - (raw, respMime) <- case fetchResult of - Left msg -> err 67 $ "Could not fetch " ++ src ++ - "\n" ++ show msg - Right x -> return x - let raw' = if ext == ".gz" - then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks - $ [raw] - else raw - let mime = case (mimetype, respMime) of - ("",Nothing) -> error - $ "Could not determine mime type for `" ++ src ++ "'" - (x, Nothing) -> x - (_, Just x ) -> x - let cssSourceURL = case parseURI src of - Just u - | uriScheme u `elem` ["http:","https:"] -> - Just $ show u{ uriPath = "", - uriQuery = "", - uriFragment = "" } - _ -> Nothing - result <- if mime == "text/css" - then cssURLs media cssSourceURL (takeDirectory src) raw' - else return raw' - return $ makeDataURI mime result - --- | Convert HTML into self-contained HTML, incorporating images, --- scripts, and CSS using data: URIs. -makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String -makeSelfContained opts mediabag inp = liftIO $ do - let tags = parseTags inp - out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags - return $ renderTags' out' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs deleted file mode 100644 index 268a5052e..000000000 --- a/src/Text/Pandoc/Shared.hs +++ /dev/null @@ -1,883 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, PatternGuards, - ViewPatterns #-} -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Utility functions and definitions used by the various Pandoc modules. --} -module Text.Pandoc.Shared ( - -- * List processing - splitBy, - splitByIndices, - splitStringByIndices, - substitute, - ordNub, - -- * Text processing - backslashEscapes, - escapeStringUsing, - stripTrailingNewlines, - trim, - triml, - trimr, - stripFirstAndLast, - camelCaseToHyphenated, - toRomanNumeral, - escapeURI, - tabFilter, - -- * Date/time - normalizeDate, - -- * Pandoc block and inline list processing - orderedListMarkers, - normalizeSpaces, - extractSpaces, - removeFormatting, - deNote, - stringify, - capitalize, - compactify, - compactifyDL, - linesToPara, - Element (..), - hierarchicalize, - uniqueIdent, - inlineListToIdentifier, - isHeaderBlock, - headerShift, - isTightList, - addMetaField, - makeMeta, - -- * TagSoup HTML handling - renderTags', - -- * File handling - inDirectory, - getDefaultReferenceDocx, - getDefaultReferenceODT, - readDataFile, - readDataFileUTF8, - openURL, - collapseFilePath, - filteredFilesFromArchive, - -- * Error handling - err, - warn, - mapLeft, - -- * for squashing blocks - blocksToInlines, - -- * Safe read - safeRead, - -- * Temp directory - withTempDir, - -- * Version - pandocVersion - ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) -import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.UTF8 as UTF8 -import System.Exit (exitWith, ExitCode(..)) -import Data.Char ( toLower, isLower, isUpper, isAlpha, - isLetter, isDigit, isSpace ) -import Data.List ( find, stripPrefix, intercalate ) -import Data.Maybe (mapMaybe) -import Data.Version ( showVersion ) -import qualified Data.Map as M -import Network.URI ( escapeURIString, unEscapeString ) -import qualified Data.Set as Set -import System.Directory -import System.FilePath (splitDirectories, isPathSeparator) -import qualified System.FilePath.Posix as Posix -import Text.Pandoc.MIME (MimeType) -import System.FilePath ( (</>) ) -import Data.Generics (Typeable, Data) -import qualified Control.Monad.State as S -import Control.Monad.Trans (MonadIO (..)) -import qualified Control.Exception as E -import Control.Monad (msum, unless, MonadPlus(..)) -import Text.Pandoc.Pretty (charWidth) -import Text.Pandoc.Compat.Time -import Data.Time.Clock.POSIX -import System.IO (stderr) -import System.IO.Temp -import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), - renderOptions) -import Data.Monoid ((<>)) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as B8 -import Data.ByteString.Base64 (decodeLenient) -import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Text as T (toUpper, pack, unpack) -import Data.ByteString.Lazy (toChunks, fromChunks) -import qualified Data.ByteString.Lazy as BL -import Paths_pandoc (version) - -import Codec.Archive.Zip - -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#else -import Paths_pandoc (getDataFileName) -#endif -#ifdef HTTP_CLIENT -import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host,requestHeaders)) -import Network.HTTP.Client (parseRequest) -import Network.HTTP.Client (newManager) -import Network.HTTP.Client.Internal (addProxy) -import Network.HTTP.Client.TLS (tlsManagerSettings) -import System.Environment (getEnv) -import Network.HTTP.Types.Header ( hContentType, hUserAgent) -import Network (withSocketsDo) -#else -import Network.URI (parseURI) -import Network.HTTP (findHeader, rspBody, - RequestMethod(..), HeaderName(..), mkRequest) -import Network.Browser (browse, setAllowRedirects, setOutHandler, request) -#endif - --- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = showVersion version - --- --- List processing --- - --- | Split list by groups of one or more sep. -splitBy :: (a -> Bool) -> [a] -> [[a]] -splitBy _ [] = [] -splitBy isSep lst = - let (first, rest) = break isSep lst - rest' = dropWhile isSep rest - in first:(splitBy isSep rest') - -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest) - where (first, rest) = splitAt x lst - --- | Split string into chunks divided at specified indices. -splitStringByIndices :: [Int] -> [Char] -> [[Char]] -splitStringByIndices [] lst = [lst] -splitStringByIndices (x:xs) lst = - let (first, rest) = splitAt' x lst in - first : (splitStringByIndices (map (\y -> y - x) xs) rest) - -splitAt' :: Int -> [Char] -> ([Char],[Char]) -splitAt' _ [] = ([],[]) -splitAt' n xs | n <= 0 = ([],xs) -splitAt' n (x:xs) = (x:ys,zs) - where (ys,zs) = splitAt' (n - charWidth x) xs - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ xs = xs -substitute target replacement lst@(x:xs) = - case stripPrefix target lst of - Just lst' -> replacement ++ substitute target replacement lst' - Nothing -> x : substitute target replacement xs - -ordNub :: (Ord a) => [a] -> [a] -ordNub l = go Set.empty l - where - go _ [] = [] - go s (x:xs) = if x `Set.member` s then go s xs - else x : go (Set.insert x s) xs - --- --- Text processing --- - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse - --- | Remove leading and trailing space (including newlines) from string. -trim :: String -> String -trim = triml . trimr - --- | Remove leading space (including newlines) from string. -triml :: String -> String -triml = dropWhile (`elem` " \r\n\t") - --- | Remove trailing space (including newlines) from string. -trimr :: String -> String -trimr = reverse . triml . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x - | x >= 4000 || x < 0 = "?" - | x >= 1000 = "M" ++ toRomanNumeral (x - 1000) - | x >= 900 = "CM" ++ toRomanNumeral (x - 900) - | x >= 500 = "D" ++ toRomanNumeral (x - 500) - | x >= 400 = "CD" ++ toRomanNumeral (x - 400) - | x >= 100 = "C" ++ toRomanNumeral (x - 100) - | x >= 90 = "XC" ++ toRomanNumeral (x - 90) - | x >= 50 = "L" ++ toRomanNumeral (x - 50) - | x >= 40 = "XL" ++ toRomanNumeral (x - 40) - | x >= 10 = "X" ++ toRomanNumeral (x - 10) - | x == 9 = "IX" - | x >= 5 = "V" ++ toRomanNumeral (x - 5) - | x == 4 = "IV" - | x >= 1 = "I" ++ toRomanNumeral (x - 1) - | otherwise = "" - --- | Escape whitespace and some punctuation characters in URI. -escapeURI :: String -> String -escapeURI = escapeURIString (not . needsEscaping) - where needsEscaping c = isSpace c || c `elem` - ['<','>','|','"','{','}','[',']','^', '`'] - - --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. -tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop - --- --- Date/time --- - --- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We --- limit years to the range 1601-9999 (ISO 8601 accepts greater than --- or equal to 1583, but MS Word only accepts dates starting 1601). -normalizeDate :: String -> Maybe String -normalizeDate s = fmap (formatTime defaultTimeLocale "%F") - (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) - where rejectBadYear day = case toGregorian day of - (y, _, _) | y >= 1601 && y <= 9999 -> Just day - _ -> Nothing - parsetimeWith = -#if MIN_VERSION_time(1,5,0) - parseTimeM True defaultTimeLocale -#else - parseTime defaultTimeLocale -#endif - formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", - "%Y%m%d", "%Y%m", "%Y"] - --- --- Pandoc block and inline list processing --- - --- | Generate infinite lazy list of markers for an ordered list, --- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Example -> map show [start..] - Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] - LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] - UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] - inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" - in map inDelim nums - --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of - [] -> [] - (x:xs) -> Space : x : cleanup xs - cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest - -isSpaceOrEmpty :: Inline -> Bool -isSpaceOrEmpty Space = True -isSpaceOrEmpty (Str "") = True -isSpaceOrEmpty _ = False - --- | Extract the leading and trailing spaces from inside an inline element --- and place them outside the element. SoftBreaks count as Spaces for --- these purposes. -extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines -extractSpaces f is = - let contents = B.unMany is - left = case viewl contents of - (Space :< _) -> B.space - (SoftBreak :< _) -> B.softbreak - _ -> mempty - right = case viewr contents of - (_ :> Space) -> B.space - (_ :> SoftBreak) -> B.softbreak - _ -> mempty in - (left <> f (B.trimInlines . B.Many $ contents) <> right) - --- | Extract inlines, removing formatting. -removeFormatting :: Walkable Inline a => a -> [Inline] -removeFormatting = query go . walk deNote - where go :: Inline -> [Inline] - go (Str xs) = [Str xs] - go Space = [Space] - go SoftBreak = [SoftBreak] - go (Code _ x) = [Str x] - go (Math _ x) = [Str x] - go LineBreak = [Space] - go _ = [] - -deNote :: Inline -> Inline -deNote (Note _) = Str "" -deNote x = x - --- | Convert pandoc structure to a string with formatting removed. --- Footnotes are skipped (since we don't want their contents in link --- labels). -stringify :: Walkable Inline a => a -> String -stringify = query go . walk deNote - where go :: Inline -> [Char] - go Space = " " - go SoftBreak = " " - go (Str x) = x - go (Code _ x) = x - go (Math _ x) = x - go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 - go LineBreak = " " - go _ = "" - --- | Bring all regular text in a pandoc structure to uppercase. --- --- This function correctly handles cases where a lowercase character doesn't --- match to a single uppercase character – e.g. “Straße” would be converted --- to “STRASSE”, not “STRAßE”. -capitalize :: Walkable Inline a => a -> a -capitalize = walk go - where go :: Inline -> Inline - go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) - go x = x - --- | Change final list item from @Para@ to @Plain@ if the list contains --- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather --- than @[Block]@. -compactify :: [Blocks] -- ^ List of list items (each a list of blocks) - -> [Blocks] -compactify [] = [] -compactify items = - let (others, final) = (init items, last items) - in case reverse (B.toList final) of - (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of - -- if this is only Para, change to Plain - [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] - _ -> items - _ -> items - --- | Like @compactify@, but acts on items of definition lists. -compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] -compactifyDL items = - let defs = concatMap snd items - in case reverse (concatMap B.toList defs) of - (Para x:xs) - | not (any isPara xs) -> - let (t,ds) = last items - lastDef = B.toList $ last ds - ds' = init ds ++ - if null lastDef - then [B.fromList lastDef] - else [B.fromList $ init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - | otherwise -> items - _ -> items - --- | Combine a list of lines by adding hard linebreaks. -combineLines :: [[Inline]] -> [Inline] -combineLines = intercalate [LineBreak] - --- | Convert a list of lines into a paragraph with hard line breaks. This is --- useful e.g. for rudimentary support of LineBlock elements in writers. -linesToPara :: [[Inline]] -> Block -linesToPara = Para . combineLines - -isPara :: Block -> Bool -isPara (Para _) = True -isPara _ = False - --- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block - | Sec Int [Int] Attr [Inline] [Element] - -- lvl num attributes label contents - deriving (Eq, Read, Show, Typeable, Data) - -instance Walkable Inline Element where - walk f (Blk x) = Blk (walk f x) - walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) - walkM f (Blk x) = Blk `fmap` walkM f x - walkM f (Sec lev nums attr ils elts) = do - ils' <- walkM f ils - elts' <- walkM f elts - return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x - query f (Sec _ _ _ ils elts) = query f ils <> query f elts - -instance Walkable Block Element where - walk f (Blk x) = Blk (walk f x) - walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) - walkM f (Blk x) = Blk `fmap` walkM f x - walkM f (Sec lev nums attr ils elts) = do - ils' <- walkM f ils - elts' <- walkM f elts - return $ Sec lev nums attr ils' elts' - query f (Blk x) = query f x - query f (Sec _ _ _ ils elts) = query f ils <> query f elts - - --- | Convert Pandoc inline list to plain text identifier. HTML --- identifiers must start with a letter, and may contain only --- letters, digits, and the characters _-. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = - dropWhile (not . isAlpha) . intercalate "-" . words . - map (nbspToSp . toLower) . - filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . - stringify - where nbspToSp '\160' = ' ' - nbspToSp x = x - --- | Convert list of Pandoc blocks into (hierarchical) list of Elements -hierarchicalize :: [Block] -> [Element] -hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) [] - -hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element] -hierarchicalizeWithIds [] = return [] -hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do - lastnum <- S.get - let lastnum' = take level lastnum - let newnum = case length lastnum' of - x | "unnumbered" `elem` classes -> [] - | x >= level -> init lastnum' ++ [last lastnum' + 1] - | otherwise -> lastnum ++ - replicate (level - length lastnum - 1) 0 ++ [1] - unless (null newnum) $ S.put newnum - let (sectionContents, rest) = break (headerLtEq level) xs - sectionContents' <- hierarchicalizeWithIds sectionContents - rest' <- hierarchicalizeWithIds rest - return $ Sec level newnum attr title' sectionContents' : rest' -hierarchicalizeWithIds ((Div ("",["references"],[]) - (Header level (ident,classes,kvs) title' : xs)):ys) = - hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) - title') : (xs ++ ys)) -hierarchicalizeWithIds (x:rest) = do - rest' <- hierarchicalizeWithIds rest - return $ (Blk x) : rest' - -headerLtEq :: Int -> Block -> Bool -headerLtEq level (Header l _ _) = l <= level -headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level -headerLtEq _ _ = False - --- | Generate a unique identifier from a list of inlines. --- Second argument is a list of already used identifiers. -uniqueIdent :: [Inline] -> Set.Set String -> String -uniqueIdent title' usedIdents - = let baseIdent = case inlineListToIdentifier title' of - "" -> "section" - x -> x - numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `Set.member` usedIdents - then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of - Just x -> numIdent x - Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent - --- | True if block is a Header block. -isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _ _) = True -isHeaderBlock _ = False - --- | Shift header levels up or down. -headerShift :: Int -> Pandoc -> Pandoc -headerShift n = walk shift - where shift :: Block -> Block - shift (Header level attr inner) = Header (level + n) attr inner - shift x = x - --- | Detect if a list is tight. -isTightList :: [[Block]] -> Bool -isTightList = all firstIsPlain - where firstIsPlain (Plain _ : _) = True - firstIsPlain _ = False - --- | Set a field of a 'Meta' object. If the field already has a value, --- convert it into a list with the new value appended to the old value(s). -addMetaField :: ToMetaValue a - => String - -> a - -> Meta - -> Meta -addMetaField key val (Meta meta) = - Meta $ M.insertWith combine key (toMetaValue val) meta - where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) - combine newval x = MetaList [x, newval] - tolist (MetaList ys) = ys - tolist y = [y] - --- | Create 'Meta' from old-style title, authors, date. This is --- provided to ease the transition from the old API. -makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta -makeMeta title authors date = - addMetaField "title" (B.fromList title) - $ addMetaField "author" (map B.fromList authors) - $ addMetaField "date" (B.fromList date) - $ nullMeta - --- --- TagSoup HTML handling --- - --- | Render HTML tags. -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = matchTags ["hr", "br", "img", - "meta", "link"] - , optRawTag = matchTags ["script", "style"] } - where matchTags = \tags -> flip elem tags . map toLower - --- --- File handling --- - --- | Perform an IO action in a directory, returning to starting directory. -inDirectory :: FilePath -> IO a -> IO a -inDirectory path action = E.bracket - getCurrentDirectory - setCurrentDirectory - (const $ setCurrentDirectory path >> action) - -getDefaultReferenceDocx :: Maybe FilePath -> IO Archive -getDefaultReferenceDocx datadir = do - let paths = ["[Content_Types].xml", - "_rels/.rels", - "docProps/app.xml", - "docProps/core.xml", - "word/document.xml", - "word/fontTable.xml", - "word/footnotes.xml", - "word/numbering.xml", - "word/settings.xml", - "word/webSettings.xml", - "word/styles.xml", - "word/_rels/document.xml.rels", - "word/_rels/footnotes.xml.rels", - "word/theme/theme1.xml"] - let toLazy = fromChunks . (:[]) - let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> - getCurrentTime - contents <- toLazy <$> readDataFile datadir - ("docx/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.docx") - if exists - then return (Just (d </> "reference.docx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - -getDefaultReferenceODT :: Maybe FilePath -> IO Archive -getDefaultReferenceODT datadir = do - let paths = ["mimetype", - "manifest.rdf", - "styles.xml", - "content.xml", - "meta.xml", - "settings.xml", - "Configurations2/accelerator/current.xml", - "Thumbnails/thumbnail.png", - "META-INF/manifest.xml"] - let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime - contents <- (fromChunks . (:[])) `fmap` - readDataFile datadir ("odt/" ++ path) - return $ toEntry path epochtime contents - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then return (Just (d </> "reference.odt")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> BL.readFile arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - - -readDefaultDataFile :: FilePath -> IO BS.ByteString -readDefaultDataFile "reference.docx" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing -readDefaultDataFile "reference.odt" = - (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing -readDefaultDataFile fname = -#ifdef EMBED_DATA_FILES - case lookup (makeCanonical fname) dataFiles of - Nothing -> err 97 $ "Could not find data file " ++ fname - Just contents -> return contents - where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - transformPathParts = reverse . foldl go [] - go as "." = as - go (_:as) ".." = as - go as x = x : as -#else - getDataFileName fname' >>= checkExistence >>= BS.readFile - where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname - -checkExistence :: FilePath -> IO FilePath -checkExistence fn = do - exists <- doesFileExist fn - if exists - then return fn - else err 97 ("Could not find data file " ++ fn) -#endif - --- | Read file from specified user data directory or, if not found there, from --- Cabal data directory. -readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString -readDataFile Nothing fname = readDefaultDataFile fname -readDataFile (Just userDir) fname = do - exists <- doesFileExist (userDir </> fname) - if exists - then BS.readFile (userDir </> fname) - else readDefaultDataFile fname - --- | Same as 'readDataFile' but returns a String instead of a ByteString. -readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String -readDataFileUTF8 userDir fname = - UTF8.toString `fmap` readDataFile userDir fname - --- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) -openURL u - | Just u'' <- stripPrefix "data:" u = - let mime = takeWhile (/=',') u'' - contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return $ Right (decodeLenient contents, Just mime) -#ifdef HTTP_CLIENT - | otherwise = withSocketsDo $ E.try $ do - let parseReq = parseRequest - (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" - (useragent :: Either E.SomeException String) <- E.try $ getEnv "USER_AGENT" - req <- parseReq u - req' <- case proxy of - Left _ -> return req - Right pr -> (parseReq pr >>= \r -> - return $ addProxy (host r) (port r) req) - `mplus` return req - req'' <- case useragent of - Left _ -> return req' - Right ua -> do - let headers = requestHeaders req' - let useragentheader = (hUserAgent, B8.pack ua) - let headers' = useragentheader:headers - return $ req' {requestHeaders = headers'} - resp <- newManager tlsManagerSettings >>= httpLbs req'' - return (BS.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) -#else - | otherwise = E.try $ getBodyAndMimeType `fmap` browse - (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." - setOutHandler $ const (return ()) - setAllowRedirects True - request (getRequest' u')) - where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) - getRequest' uriString = case parseURI uriString of - Nothing -> error ("Not a valid URL: " ++ - uriString) - Just v -> mkRequest GET v - u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI -#endif - --- --- Error reporting --- - -err :: MonadIO m => Int -> String -> m a -err exitCode msg = liftIO $ do - UTF8.hPutStrLn stderr msg - exitWith $ ExitFailure exitCode - return undefined - -warn :: MonadIO m => String -> m () -warn msg = liftIO $ do - UTF8.hPutStrLn stderr $ "[warning] " ++ msg - -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left x) = Left (f x) -mapLeft _ (Right x) = Right x - --- | Remove intermediate "." and ".." directories from a path. --- --- > collapseFilePath "./foo" == "foo" --- > collapseFilePath "/bar/../baz" == "/baz" --- > collapseFilePath "/../baz" == "/../baz" --- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar" --- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar" --- > collapseFilePath "parent/foo/.." == "parent" --- > collapseFilePath "/parent/foo/../../bar" == "/bar" -collapseFilePath :: FilePath -> FilePath -collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories - where - go rs "." = rs - go r@(p:rs) ".." = case p of - ".." -> ("..":r) - (checkPathSeperator -> Just True) -> ("..":r) - _ -> rs - go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]] - go rs x = x:rs - isSingleton [] = Nothing - isSingleton [x] = Just x - isSingleton _ = Nothing - checkPathSeperator = fmap isPathSeparator . isSingleton - --- --- File selection from the archive --- -filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)] -filteredFilesFromArchive zf f = - mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf)) - where - fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) - fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) - ---- ---- Squash blocks into inlines ---- - -blockToInlines :: Block -> [Inline] -blockToInlines (Plain ils) = ils -blockToInlines (Para ils) = ils -blockToInlines (LineBlock lns) = combineLines lns -blockToInlines (CodeBlock attr str) = [Code attr str] -blockToInlines (RawBlock fmt str) = [RawInline fmt str] -blockToInlines (BlockQuote blks) = blocksToInlines blks -blockToInlines (OrderedList _ blkslst) = - concatMap blocksToInlines blkslst -blockToInlines (BulletList blkslst) = - concatMap blocksToInlines blkslst -blockToInlines (DefinitionList pairslst) = - concatMap f pairslst - where - f (ils, blkslst) = ils ++ - [Str ":", Space] ++ - (concatMap blocksToInlines blkslst) -blockToInlines (Header _ _ ils) = ils -blockToInlines (HorizontalRule) = [] -blockToInlines (Table _ _ _ headers rows) = - intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl - where - tbl = headers : rows -blockToInlines (Div _ blks) = blocksToInlines blks -blockToInlines Null = [] - -blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline] -blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks - -blocksToInlines :: [Block] -> [Inline] -blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space] - - --- --- Safe read --- - -safeRead :: (MonadPlus m, Read a) => String -> m a -safeRead s = case reads s of - (d,x):_ - | all isSpace x -> return d - _ -> mzero - --- --- Temp directory --- - -withTempDir :: String -> (FilePath -> IO a) -> IO a -withTempDir = -#ifdef _WINDOWS - withTempDirectory "." -#else - withSystemTempDirectory -#endif diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs deleted file mode 100644 index e19dba3e2..000000000 --- a/src/Text/Pandoc/Slides.hs +++ /dev/null @@ -1,63 +0,0 @@ -{- -Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Utility functions for splitting documents into slides for slide -show formats (dzslides, revealjs, s5, slidy, slideous, beamer). --} -module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where -import Text.Pandoc.Definition - --- | Find level of header that starts slides (defined as the least header --- level that occurs before a non-header/non-hrule in the blocks). -getSlideLevel :: [Block] -> Int -getSlideLevel = go 6 - where go least (Header n _ _ : x : xs) - | n < least && nonHOrHR x = go n xs - | otherwise = go least (x:xs) - go least (_ : xs) = go least xs - go least [] = least - nonHOrHR (Header{}) = False - nonHOrHR (HorizontalRule) = False - nonHOrHR _ = True - --- | Prepare a block list to be passed to hierarchicalize. -prepSlides :: Int -> [Block] -> [Block] -prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader - where splitHrule (HorizontalRule : Header n attr xs : ys) - | n == slideLevel = Header slideLevel attr xs : splitHrule ys - splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] : - splitHrule xs - splitHrule (x : xs) = x : splitHrule xs - splitHrule [] = [] - extractRefsHeader bs = - case reverse bs of - (Div ("",["references"],[]) (Header n attrs xs : ys) : zs) - -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys]) - _ -> bs - ensureStartWithH bs@(Header n _ _:_) - | n <= slideLevel = bs - ensureStartWithH bs = Header slideLevel nullAttr [Str "\0"] : bs diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs deleted file mode 100644 index 705ac54c9..000000000 --- a/src/Text/Pandoc/Templates.hs +++ /dev/null @@ -1,77 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, - OverloadedStrings, GeneralizedNewtypeDeriving #-} -{- -Copyright (C) 2009-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -A simple templating system with variable substitution and conditionals. - --} - -module Text.Pandoc.Templates ( renderTemplate - , renderTemplate' - , TemplateTarget - , varListToJSON - , compileTemplate - , Template - , getDefaultTemplate ) where - -import Text.DocTemplates (Template, TemplateTarget, compileTemplate, - renderTemplate, applyTemplate, - varListToJSON) -import Data.Aeson (ToJSON(..)) -import qualified Data.Text as T -import System.FilePath ((</>), (<.>)) -import qualified Control.Exception.Extensible as E (try, IOException) -import Text.Pandoc.Shared (readDataFileUTF8) - --- | Get default template for the specified writer. -getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first - -> String -- ^ Name of writer - -> IO (Either E.IOException String) -getDefaultTemplate user writer = do - let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions - case format of - "native" -> return $ Right "" - "json" -> return $ Right "" - "docx" -> return $ Right "" - "fb2" -> return $ Right "" - "odt" -> getDefaultTemplate user "opendocument" - "html" -> getDefaultTemplate user "html5" - "docbook" -> getDefaultTemplate user "docbook5" - "epub" -> getDefaultTemplate user "epub3" - "markdown_strict" -> getDefaultTemplate user "markdown" - "multimarkdown" -> getDefaultTemplate user "markdown" - "markdown_github" -> getDefaultTemplate user "markdown" - "markdown_mmd" -> getDefaultTemplate user "markdown" - "markdown_phpextra" -> getDefaultTemplate user "markdown" - _ -> let fname = "templates" </> "default" <.> format - in E.try $ readDataFileUTF8 user fname - --- | Like 'applyTemplate', but raising an error if compilation fails. -renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b -renderTemplate' template = either error id . applyTemplate (T.pack template) - diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs deleted file mode 100644 index 62a662029..000000000 --- a/src/Text/Pandoc/UTF8.hs +++ /dev/null @@ -1,121 +0,0 @@ -{- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. --} -module Text.Pandoc.UTF8 ( readFile - , writeFile - , getContents - , putStr - , putStrLn - , hPutStr - , hPutStrLn - , hGetContents - , toString - , fromString - , toStringLazy - , fromStringLazy - , encodePath - , decodeArg - ) - -where - -import System.IO hiding (readFile, writeFile, getContents, - putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) -import qualified System.IO as IO -import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.Text.Encoding as T -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL -import qualified Data.Text.Lazy.Encoding as TL - -readFile :: FilePath -> IO String -readFile f = do - h <- openFile (encodePath f) ReadMode - hGetContents h - -writeFile :: FilePath -> String -> IO () -writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s - -getContents :: IO String -getContents = hGetContents stdin - -putStr :: String -> IO () -putStr s = hPutStr stdout s - -putStrLn :: String -> IO () -putStrLn s = hPutStrLn stdout s - -hPutStr :: Handle -> String -> IO () -hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s - -hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s - -hGetContents :: Handle -> IO String -hGetContents = fmap toString . B.hGetContents --- hGetContents h = hSetEncoding h utf8_bom --- >> hSetNewlineMode h universalNewlineMode --- >> IO.hGetContents h - --- | Drop BOM (byte order marker) if present at beginning of string. --- Note that Data.Text converts the BOM to code point FEFF, zero-width --- no-break space, so if the string begins with this we strip it off. -dropBOM :: String -> String -dropBOM ('\xFEFF':xs) = xs -dropBOM xs = xs - -filterCRs :: String -> String -filterCRs ('\r':'\n':xs) = '\n': filterCRs xs -filterCRs ('\r':xs) = '\n' : filterCRs xs -filterCRs (x:xs) = x : filterCRs xs -filterCRs [] = [] - --- | Convert UTF8-encoded ByteString to String, also --- removing '\r' characters. -toString :: B.ByteString -> String -toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 - -fromString :: String -> B.ByteString -fromString = T.encodeUtf8 . T.pack - --- | Convert UTF8-encoded ByteString to String, also --- removing '\r' characters. -toStringLazy :: BL.ByteString -> String -toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 - -fromStringLazy :: String -> BL.ByteString -fromStringLazy = TL.encodeUtf8 . TL.pack - -encodePath :: FilePath -> FilePath -encodePath = id - -decodeArg :: String -> String -decodeArg = id diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs deleted file mode 100644 index 8de102742..000000000 --- a/src/Text/Pandoc/UUID.hs +++ /dev/null @@ -1,78 +0,0 @@ -{- -Copyright (C) 2010-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -UUID generation using Version 4 (random method) described -in RFC4122. See http://tools.ietf.org/html/rfc4122 --} - -module Text.Pandoc.UUID ( UUID(..), getRandomUUID, getUUID ) where - -import Text.Printf ( printf ) -import System.Random ( RandomGen, randoms, getStdGen ) -import Data.Word -import Data.Bits ( setBit, clearBit ) - -data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 - Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 - -instance Show UUID where - show (UUID a b c d e f g h i j k l m n o p) = - "urn:uuid:" ++ - printf "%02x" a ++ - printf "%02x" b ++ - printf "%02x" c ++ - printf "%02x" d ++ - "-" ++ - printf "%02x" e ++ - printf "%02x" f ++ - "-" ++ - printf "%02x" g ++ - printf "%02x" h ++ - "-" ++ - printf "%02x" i ++ - printf "%02x" j ++ - "-" ++ - printf "%02x" k ++ - printf "%02x" l ++ - printf "%02x" m ++ - printf "%02x" n ++ - printf "%02x" o ++ - printf "%02x" p - -getUUID :: RandomGen g => g -> UUID -getUUID gen = - let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8] - -- set variant - i' = i `setBit` 7 `clearBit` 6 - -- set version (0100 for random) - g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 - in - UUID a b c d e f g' h i' j k l m n o p - -getRandomUUID :: IO UUID -getRandomUUID = getUUID <$> getStdGen - diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs deleted file mode 100644 index 356b29504..000000000 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ /dev/null @@ -1,470 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to asciidoc. - -Note that some information may be lost in conversion, due to -expressive limitations of asciidoc. Footnotes and table cells with -paragraphs (or other block items) are not possible in asciidoc. -If pandoc encounters one of these, it will insert a message indicating -that it has omitted the construct. - -AsciiDoc: <http://www.methods.co.nz/asciidoc/> --} -module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where -import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, space) -import Data.Maybe (fromMaybe) -import Data.List ( stripPrefix, intersperse, intercalate ) -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Control.Monad.State -import qualified Data.Map as M -import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) -import qualified Data.Text as T -import Data.Char (isSpace, isPunctuation) -import Text.Pandoc.Class (PandocMonad) - -data WriterState = WriterState { defListMarker :: String - , orderedListLevel :: Int - , bulletListLevel :: Int - , intraword :: Bool - } - --- | Convert Pandoc to AsciiDoc. -writeAsciiDoc :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeAsciiDoc opts document = return $ - evalState (pandocToAsciiDoc opts document) WriterState{ - defListMarker = "::" - , orderedListLevel = 1 - , bulletListLevel = 1 - , intraword = False - } - --- | Return asciidoc representation of document. -pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String -pandocToAsciiDoc opts (Pandoc meta blocks) = do - let titleblock = not $ null (docTitle meta) && null (docAuthors meta) && - null (docDate meta) - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToAsciiDoc opts) - (fmap (render colwidth) . inlineListToAsciiDoc opts) - meta - let addTitleLine (String t) = String $ - t <> "\n" <> T.replicate (T.length t) "=" - addTitleLine x = x - let metadata' = case fromJSON metadata of - Success m -> toJSON $ M.adjust addTitleLine - ("title" :: T.Text) m - _ -> metadata - body <- blockListToAsciiDoc opts blocks - let main = render colwidth body - let context = defField "body" main - $ defField "toc" - (writerTableOfContents opts && - writerTemplate opts /= Nothing) - $ defField "titleblock" titleblock - $ metadata' - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Escape special characters for AsciiDoc. -escapeString :: String -> String -escapeString = escapeStringUsing escs - where escs = backslashEscapes "{" - --- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char -olMarker = do (start, style', delim) <- anyOrderedListMarker - if delim == Period && - (style' == UpperAlpha || (style' == UpperRoman && - start `elem` [1, 5, 10, 50, 100, 500, 1000])) - then spaceChar >> spaceChar - else spaceChar - --- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True - --- | Convert Pandoc block element to asciidoc. -blockToAsciiDoc :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToAsciiDoc _ Null = return empty -blockToAsciiDoc opts (Plain inlines) = do - contents <- inlineListToAsciiDoc opts inlines - return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do - blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) -blockToAsciiDoc opts (Para inlines) = do - contents <- inlineListToAsciiDoc opts inlines - -- escape if para starts with ordered list marker - let esc = if beginsWithOrderedListMarker (render Nothing contents) - then text "\\" - else empty - return $ esc <> contents <> blankline -blockToAsciiDoc opts (LineBlock lns) = do - let docify line = if null line - then return blankline - else inlineListToAsciiDoc opts line - let joinWithLinefeeds = nowrap . mconcat . intersperse cr - contents <- joinWithLinefeeds <$> mapM docify lns - return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline -blockToAsciiDoc _ (RawBlock f s) - | f == "asciidoc" = return $ text s - | otherwise = return empty -blockToAsciiDoc _ HorizontalRule = - return $ blankline <> text "'''''" <> blankline -blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do - contents <- inlineListToAsciiDoc opts inlines - let len = offset contents - -- ident seem to be empty most of the time and asciidoc will generate them automatically - -- so lets make them not show up when null - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") - let setext = writerSetextHeaders opts - return $ - (if setext - then - identifier $$ contents $$ - (case level of - 1 -> text $ replicate len '-' - 2 -> text $ replicate len '~' - 3 -> text $ replicate len '^' - 4 -> text $ replicate len '+' - _ -> empty) <> blankline - else - identifier $$ text (replicate level '=') <> space <> contents <> blankline) -blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ (flush $ - if null classes - then "...." $$ text str $$ "...." - else attrs $$ "----" $$ text str $$ "----") - <> blankline - where attrs = "[" <> text (intercalate "," ("source" : classes)) <> "]" -blockToAsciiDoc opts (BlockQuote blocks) = do - contents <- blockListToAsciiDoc opts blocks - let isBlock (BlockQuote _) = True - isBlock _ = False - -- if there are nested block quotes, put in an open block - let contents' = if any isBlock blocks - then "--" $$ contents $$ "--" - else contents - let cols = offset contents' - let bar = text $ replicate cols '_' - return $ bar $$ chomp contents' $$ bar <> blankline -blockToAsciiDoc opts (Table caption aligns widths headers rows) = do - caption' <- inlineListToAsciiDoc opts caption - let caption'' = if null caption - then empty - else "." <> caption' <> cr - let isSimple = all (== 0) widths - let relativePercentWidths = if isSimple - then widths - else map (/ (sum widths)) widths - let widths'' :: [Integer] - widths'' = map (floor . (* 100)) relativePercentWidths - -- ensure that the widths sum to 100 - let widths' = case widths'' of - _ | isSimple -> widths'' - (w:ws) | sum (w:ws) < 100 - -> (100 - sum ws) : ws - ws -> ws - let totalwidth :: Integer - totalwidth = floor $ sum widths * 100 - let colspec al wi = (case al of - AlignLeft -> "<" - AlignCenter -> "^" - AlignRight -> ">" - AlignDefault -> "") ++ - if wi == 0 then "" else (show wi ++ "%") - let headerspec = if all null headers - then empty - else text "options=\"header\"," - let widthspec = if totalwidth == 0 - then empty - else text "width=" - <> doubleQuotes (text $ show totalwidth ++ "%") - <> text "," - let tablespec = text "[" - <> widthspec - <> text "cols=" - <> doubleQuotes (text $ intercalate "," - $ zipWith colspec aligns widths') - <> text "," - <> headerspec <> text "]" - let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] - return $ text "|" <> chomp d - makeCell [Para x] = makeCell [Plain x] - makeCell [] = return $ text "|" - makeCell bs = do d <- blockListToAsciiDoc opts bs - return $ text "a|" $$ d - let makeRow cells = hsep `fmap` mapM makeCell cells - rows' <- mapM makeRow rows - head' <- makeRow headers - let head'' = if all null headers then empty else head' - let colwidth = if writerWrapText opts == WrapAuto - then writerColumns opts - else 100000 - let maxwidth = maximum $ map offset (head':rows') - let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '=' - return $ - caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline -blockToAsciiDoc opts (BulletList items) = do - contents <- mapM (bulletListItemToAsciiDoc opts) items - return $ cat contents <> blankline -blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do - let sty' = case sty of - UpperRoman -> UpperAlpha - LowerRoman -> LowerAlpha - x -> x - let markers = orderedListMarkers (1, sty', Period) -- start num not used - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $ - zip markers' items - return $ cat contents <> blankline -blockToAsciiDoc opts (DefinitionList items) = do - contents <- mapM (definitionListItemToAsciiDoc opts) items - return $ cat contents <> blankline -blockToAsciiDoc opts (Div (ident,_,_) bs) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") - contents <- blockListToAsciiDoc opts bs - return $ identifier $$ contents - --- | Convert bullet list item (list of blocks) to asciidoc. -bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToAsciiDoc opts blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc - addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b - addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b - return $ d <> cr <> chomp x - addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b - return $ d <> cr <> chomp x - addBlock d b = do x <- blockToAsciiDoc opts b - return $ d <> cr <> text "+" <> cr <> chomp x - lev <- bulletListLevel `fmap` get - modify $ \s -> s{ bulletListLevel = lev + 1 } - contents <- foldM addBlock empty blocks - modify $ \s -> s{ bulletListLevel = lev } - let marker = text (replicate lev '*') - return $ marker <> text " " <> contents <> cr - --- | Convert ordered list item (a list of blocks) to asciidoc. -orderedListItemToAsciiDoc :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToAsciiDoc opts marker blocks = do - let addBlock :: Doc -> Block -> State WriterState Doc - addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b - addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b - return $ d <> cr <> chomp x - addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b - return $ d <> cr <> chomp x - addBlock d b = do x <- blockToAsciiDoc opts b - return $ d <> cr <> text "+" <> cr <> chomp x - lev <- orderedListLevel `fmap` get - modify $ \s -> s{ orderedListLevel = lev + 1 } - contents <- foldM addBlock empty blocks - modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> text " " <> contents <> cr - --- | Convert definition list item (label, list of blocks) to asciidoc. -definitionListItemToAsciiDoc :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState Doc -definitionListItemToAsciiDoc opts (label, defs) = do - labelText <- inlineListToAsciiDoc opts label - marker <- defListMarker `fmap` get - if marker == "::" - then modify (\st -> st{ defListMarker = ";;"}) - else modify (\st -> st{ defListMarker = "::"}) - let divider = cr <> text "+" <> cr - let defsToAsciiDoc :: [Block] -> State WriterState Doc - defsToAsciiDoc ds = (vcat . intersperse divider . map chomp) - `fmap` mapM (blockToAsciiDoc opts) ds - defs' <- mapM defsToAsciiDoc defs - modify (\st -> st{ defListMarker = marker }) - let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs' - return $ labelText <> text marker <> cr <> contents <> cr - --- | Convert list of Pandoc block elements to asciidoc. -blockListToAsciiDoc :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks - -data SpacyLocation = End | Start - --- | Convert list of Pandoc inline elements to asciidoc. -inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToAsciiDoc opts lst = do - oldIntraword <- gets intraword - setIntraword False - result <- go lst - setIntraword oldIntraword - return result - where go [] = return empty - go (y:x:xs) - | not (isSpacy End y) = do - y' <- if isSpacy Start x - then inlineToAsciiDoc opts y - else withIntraword $ inlineToAsciiDoc opts y - x' <- withIntraword $ inlineToAsciiDoc opts x - xs' <- go xs - return (y' <> x' <> xs') - | not (isSpacy Start x) = do - y' <- withIntraword $ inlineToAsciiDoc opts y - xs' <- go (x:xs) - return (y' <> xs') - go (x:xs) = do - x' <- inlineToAsciiDoc opts x - xs' <- go xs - return (x' <> xs') - isSpacy :: SpacyLocation -> Inline -> Bool - isSpacy _ Space = True - isSpacy _ LineBreak = True - isSpacy _ SoftBreak = True - -- Note that \W characters count as spacy in AsciiDoc - -- for purposes of determining interword: - isSpacy End (Str xs) = case reverse xs of - c:_ -> isPunctuation c || isSpace c - _ -> False - isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c - isSpacy _ _ = False - -setIntraword :: Bool -> State WriterState () -setIntraword b = modify $ \st -> st{ intraword = b } - -withIntraword :: State WriterState a -> State WriterState a -withIntraword p = setIntraword True *> p <* setIntraword False - --- | Convert Pandoc inline element to asciidoc. -inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc -inlineToAsciiDoc opts (Emph lst) = do - contents <- inlineListToAsciiDoc opts lst - isIntraword <- gets intraword - let marker = if isIntraword then "__" else "_" - return $ marker <> contents <> marker -inlineToAsciiDoc opts (Strong lst) = do - contents <- inlineListToAsciiDoc opts lst - isIntraword <- gets intraword - let marker = if isIntraword then "**" else "*" - return $ marker <> contents <> marker -inlineToAsciiDoc opts (Strikeout lst) = do - contents <- inlineListToAsciiDoc opts lst - return $ "[line-through]*" <> contents <> "*" -inlineToAsciiDoc opts (Superscript lst) = do - contents <- inlineListToAsciiDoc opts lst - return $ "^" <> contents <> "^" -inlineToAsciiDoc opts (Subscript lst) = do - contents <- inlineListToAsciiDoc opts lst - return $ "~" <> contents <> "~" -inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst -inlineToAsciiDoc opts (Quoted SingleQuote lst) = - inlineListToAsciiDoc opts (Str "`" : lst ++ [Str "'"]) -inlineToAsciiDoc opts (Quoted DoubleQuote lst) = - inlineListToAsciiDoc opts (Str "``" : lst ++ [Str "''"]) -inlineToAsciiDoc _ (Code _ str) = return $ - text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`" -inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str -inlineToAsciiDoc _ (Math InlineMath str) = - return $ "latexmath:[$" <> text str <> "$]" -inlineToAsciiDoc _ (Math DisplayMath str) = - return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline f s) - | f == "asciidoc" = return $ text s - | otherwise = return empty -inlineToAsciiDoc _ LineBreak = return $ " +" <> cr -inlineToAsciiDoc _ Space = return space -inlineToAsciiDoc opts SoftBreak = - case writerWrapText opts of - WrapAuto -> return space - WrapPreserve -> return cr - WrapNone -> return space -inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst -inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do --- relative: link:downloads/foo.zip[download foo.zip] --- abs: http://google.cod[Google] --- or my@email.com[email john] - linktext <- inlineListToAsciiDoc opts txt - let isRelative = ':' `notElem` src - let prefix = if isRelative - then text "link:" - else empty - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - let useAuto = case txt of - [Str s] | escapeURI s == srcSuffix -> True - _ -> False - return $ if useAuto - then text srcSuffix - else prefix <> text src <> "[" <> linktext <> "]" -inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do --- image:images/logo.png[Company logo, title="blah"] - let txt = if (null alternate) || (alternate == [Str ""]) - then [Str "image"] - else alternate - linktext <- inlineListToAsciiDoc opts txt - let linktitle = if null tit - then empty - else ",title=\"" <> text tit <> "\"" - showDim dir = case (dimension dir attr) of - Just (Percent a) -> - ["scaledwidth=" <> text (show (Percent a))] - Just dim -> - [text (show dir) <> "=" <> text (showInPixel opts dim)] - Nothing -> - [] - dimList = showDim Width ++ showDim Height - dims = if null dimList - then empty - else "," <> cat (intersperse "," dimList) - return $ "image:" <> text src <> "[" <> linktext <> linktitle <> dims <> "]" -inlineToAsciiDoc opts (Note [Para inlines]) = - inlineToAsciiDoc opts (Note [Plain inlines]) -inlineToAsciiDoc opts (Note [Plain inlines]) = do - contents <- inlineListToAsciiDoc opts inlines - return $ text "footnote:[" <> contents <> "]" --- asciidoc can't handle blank lines in notes -inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" -inlineToAsciiDoc opts (Span (ident,_,_) ils) = do - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") - contents <- inlineListToAsciiDoc opts ils - return $ identifier <> contents diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs deleted file mode 100644 index b83f6785d..000000000 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ /dev/null @@ -1,190 +0,0 @@ -{- -Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.CommonMark - Copyright : Copyright (C) 2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to CommonMark. - -CommonMark: <http://commonmark.org> --} -module Text.Pandoc.Writers.CommonMark (writeCommonMark) where - -import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Definition -import Text.Pandoc.Shared (isTightList, linesToPara) -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import CMark -import qualified Data.Text as T -import Control.Monad.State (runState, State, modify, get) -import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Class (PandocMonad) -import Data.Foldable (foldrM) - --- | Convert Pandoc to CommonMark. -writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeCommonMark opts (Pandoc meta blocks) = do - let (blocks', notes) = runState (walkM processNotes blocks) [] - notes' = if null notes - then [] - else [OrderedList (1, Decimal, Period) $ reverse notes] - main <- blocksToCommonMark opts (blocks' ++ notes') - metadata <- metaToJSON opts - (blocksToCommonMark opts) - (inlinesToCommonMark opts) - meta - let context = defField "body" main $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - -processNotes :: Inline -> State [[Block]] Inline -processNotes (Note bs) = do - modify (bs :) - notes <- get - return $ Str $ "[" ++ show (length notes) ++ "]" -processNotes x = return x - -node :: NodeType -> [Node] -> Node -node = Node Nothing - -blocksToCommonMark :: PandocMonad m => WriterOptions -> [Block] -> m String -blocksToCommonMark opts bs = do - let cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - nodes <- blocksToNodes bs - return $ - T.unpack $ - nodeToCommonmark cmarkOpts colwidth $ - node DOCUMENT nodes - -inlinesToCommonMark :: PandocMonad m => WriterOptions -> [Inline] -> m String -inlinesToCommonMark opts ils = return $ - T.unpack $ nodeToCommonmark cmarkOpts colwidth - $ node PARAGRAPH (inlinesToNodes ils) - where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - -blocksToNodes :: PandocMonad m => [Block] -> m [Node] -blocksToNodes = foldrM blockToNodes [] - -blockToNodes :: PandocMonad m => Block -> [Node] -> m [Node] -blockToNodes (Plain xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) -blockToNodes (Para xs) ns = return (node PARAGRAPH (inlinesToNodes xs) : ns) -blockToNodes (LineBlock lns) ns = blockToNodes (linesToPara lns) ns -blockToNodes (CodeBlock (_,classes,_) xs) ns = return $ - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) -blockToNodes (RawBlock fmt xs) ns - | fmt == Format "html" = return (node (HTML_BLOCK (T.pack xs)) [] : ns) - | otherwise = return (node (CUSTOM_BLOCK (T.pack xs) (T.empty)) [] : ns) -blockToNodes (BlockQuote bs) ns = do - nodes <- blocksToNodes bs - return (node BLOCK_QUOTE nodes : ns) -blockToNodes (BulletList items) ns = do - nodes <- mapM blocksToNodes items - return (node (LIST ListAttributes{ - listType = BULLET_LIST, - listDelim = PERIOD_DELIM, - listTight = isTightList items, - listStart = 1 }) (map (node ITEM) nodes) : ns) -blockToNodes (OrderedList (start, _sty, delim) items) ns = do - nodes <- mapM blocksToNodes items - return (node (LIST ListAttributes{ - listType = ORDERED_LIST, - listDelim = case delim of - OneParen -> PAREN_DELIM - TwoParens -> PAREN_DELIM - _ -> PERIOD_DELIM, - listTight = isTightList items, - listStart = start }) (map (node ITEM) nodes) : ns) -blockToNodes HorizontalRule ns = return (node THEMATIC_BREAK [] : ns) -blockToNodes (Header lev _ ils) ns = return (node (HEADING lev) (inlinesToNodes ils) : ns) -blockToNodes (Div _ bs) ns = do - nodes <- blocksToNodes bs - return (nodes ++ ns) -blockToNodes (DefinitionList items) ns = blockToNodes (BulletList items') ns - where items' = map dlToBullet items - dlToBullet (term, ((Para xs : ys) : zs)) = - Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, ((Plain xs : ys) : zs)) = - Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs - dlToBullet (term, xs) = - Para term : concat xs -blockToNodes t@(Table _ _ _ _ _) ns = do - s <- writeHtml5String def $! Pandoc nullMeta [t] - return (node (HTML_BLOCK (T.pack $! s)) [] : ns) -blockToNodes Null ns = return ns - -inlinesToNodes :: [Inline] -> [Node] -inlinesToNodes = foldr inlineToNodes [] - -inlineToNodes :: Inline -> [Node] -> [Node] -inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) -inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) -inlineToNodes LineBreak = (node LINEBREAK [] :) -inlineToNodes SoftBreak = (node SOFTBREAK [] :) -inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) -inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) -inlineToNodes (Strikeout xs) = - ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</s>")) []]) ++ ) -inlineToNodes (Superscript xs) = - ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</sup>")) []]) ++ ) -inlineToNodes (Subscript xs) = - ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</sub>")) []]) ++ ) -inlineToNodes (SmallCaps xs) = - ((node (HTML_INLINE (T.pack "<span style=\"font-variant:small-caps;\">")) [] - : inlinesToNodes xs ++ - [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) -inlineToNodes (Link _ ils (url,tit)) = - (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (Image _ ils (url,tit)) = - (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) -inlineToNodes (RawInline fmt xs) - | fmt == Format "html" = (node (HTML_INLINE (T.pack xs)) [] :) - | otherwise = (node (CUSTOM_INLINE (T.pack xs) (T.empty)) [] :) -inlineToNodes (Quoted qt ils) = - ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) - where (start, end) = case qt of - SingleQuote -> (T.pack "‘", T.pack "’") - DoubleQuote -> (T.pack "“", T.pack "”") -inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) -inlineToNodes (Math mt str) = - case mt of - InlineMath -> - (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) - DisplayMath -> - (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) -inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) -inlineToNodes (Note _) = id -- should not occur --- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs deleted file mode 100644 index ea8b90db3..000000000 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ /dev/null @@ -1,481 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into ConTeXt. --} -module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Walk (query) -import Text.Printf ( printf ) -import Data.List ( intercalate, intersperse ) -import Data.Char ( ord ) -import Data.Maybe ( catMaybes ) -import Control.Monad.State -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Network.URI ( isURI, unEscapeString ) -import Text.Pandoc.Class (PandocMonad) - -data WriterState = - WriterState { stNextRef :: Int -- number of next URL reference - , stOrderedListLevel :: Int -- level of ordered list - , stOptions :: WriterOptions -- writer options - } - -orderedListStyles :: [Char] -orderedListStyles = cycle "narg" - --- | Convert Pandoc to ConTeXt. -writeConTeXt :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeConTeXt options document = return $ - let defaultWriterState = WriterState { stNextRef = 1 - , stOrderedListLevel = 0 - , stOptions = options - } - in evalState (pandocToConTeXt options document) defaultWriterState - -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String -pandocToConTeXt options (Pandoc meta blocks) = do - let colwidth = if writerWrapText options == WrapAuto - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToConTeXt) - (fmap (render colwidth) . inlineListToConTeXt) - meta - body <- mapM (elementToConTeXt options) $ hierarchicalize blocks - let main = (render colwidth . vcat) body - let layoutFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> - ((x ++ "=") ++) <$> getField y metadata) - [("leftmargin","margin-left") - ,("rightmargin","margin-right") - ,("top","margin-top") - ,("bottom","margin-bottom") - ] - let context = defField "toc" (writerTableOfContents options) - $ defField "placelist" (intercalate ("," :: String) $ - take (writerTOCDepth options + - case writerTopLevelDivision options of - TopLevelPart -> 0 - TopLevelChapter -> 0 - _ -> 1) - ["chapter","section","subsection","subsubsection", - "subsubsubsection","subsubsubsubsection"]) - $ defField "body" main - $ defField "layout" layoutFromMargins - $ defField "number-sections" (writerNumberSections options) - $ metadata - let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $ - getField "lang" context) - $ defField "context-dir" (toContextDir $ getField "dir" context) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' - -toContextDir :: Maybe String -> String -toContextDir (Just "rtl") = "r2l" -toContextDir (Just "ltr") = "l2r" -toContextDir _ = "" - --- | escape things as needed for ConTeXt -escapeCharForConTeXt :: WriterOptions -> Char -> String -escapeCharForConTeXt opts ch = - let ligatures = isEnabled Ext_smart opts in - case ch of - '{' -> "\\{" - '}' -> "\\}" - '\\' -> "\\letterbackslash{}" - '$' -> "\\$" - '|' -> "\\letterbar{}" - '%' -> "\\letterpercent{}" - '~' -> "\\lettertilde{}" - '#' -> "\\#" - '[' -> "{[}" - ']' -> "{]}" - '\160' -> "~" - '\x2014' | ligatures -> "---" - '\x2013' | ligatures -> "--" - '\x2019' | ligatures -> "'" - '\x2026' -> "\\ldots{}" - x -> [x] - --- | Escape string for ConTeXt -stringToConTeXt :: WriterOptions -> String -> String -stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) - --- | Sanitize labels -toLabel :: String -> String -toLabel z = concatMap go z - where go x - | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) - | otherwise = [x] - --- | Convert Elements to ConTeXt -elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc -elementToConTeXt _ (Blk block) = blockToConTeXt block -elementToConTeXt opts (Sec level _ attr title' elements) = do - header' <- sectionHeader attr level title' - innerContents <- mapM (elementToConTeXt opts) elements - return $ vcat (header' : innerContents) - --- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block - -> State WriterState Doc -blockToConTeXt Null = return empty -blockToConTeXt (Plain lst) = inlineListToConTeXt lst --- title beginning with fig: indicates that the image is a figure -blockToConTeXt (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do - capt <- inlineListToConTeXt txt - img <- inlineToConTeXt (Image attr txt (src, "")) - let (ident, _, _) = attr - label = if null ident - then empty - else "[]" <> brackets (text $ toLabel ident) - return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline -blockToConTeXt (Para lst) = do - contents <- inlineListToConTeXt lst - return $ contents <> blankline -blockToConTeXt (LineBlock lns) = do - doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns - return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline -blockToConTeXt (BlockQuote lst) = do - contents <- blockListToConTeXt lst - return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline -blockToConTeXt (CodeBlock _ str) = - return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline - -- blankline because \stoptyping can't have anything after it, inc. '}' -blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline -blockToConTeXt (RawBlock _ _ ) = return empty -blockToConTeXt (Div (ident,_,kvs) bs) = do - let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" - let wrapRef txt = if null ident - then txt - else ("\\reference" <> brackets (text $ toLabel ident) <> - braces empty <> "%") $$ txt - wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "righttoleft" - Just "ltr" -> align "lefttoright" - _ -> id - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" - <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop" - Nothing -> txt - wrapBlank txt = blankline <> txt <> blankline - fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs -blockToConTeXt (BulletList lst) = do - contents <- mapM listItemToConTeXt lst - return $ ("\\startitemize" <> if isTightList lst - then brackets "packed" - else empty) $$ - vcat contents $$ text "\\stopitemize" <> blankline -blockToConTeXt (OrderedList (start, style', delim) lst) = do - st <- get - let level = stOrderedListLevel st - put $ st {stOrderedListLevel = level + 1} - contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} - let start' = if start == 1 then "" else "start=" ++ show start - let delim' = case delim of - DefaultDelim -> "" - Period -> "stopper=." - OneParen -> "stopper=)" - TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) - (orderedListMarkers (start, style', delim)) - let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" - else "" - let specs2Items = filter (not . null) [start', delim', width''] - let specs2 = if null specs2Items - then "" - else "[" ++ intercalate "," specs2Items ++ "]" - let style'' = '[': (case style' of - DefaultStyle -> orderedListStyles !! level - Decimal -> 'n' - Example -> 'n' - LowerRoman -> 'r' - UpperRoman -> 'R' - LowerAlpha -> 'a' - UpperAlpha -> 'A') : - if isTightList lst then ",packed]" else "]" - let specs = style'' ++ specs2 - return $ "\\startitemize" <> text specs $$ vcat contents $$ - "\\stopitemize" <> blankline -blockToConTeXt (DefinitionList lst) = - liftM vcat $ mapM defListItemToConTeXt lst -blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline --- If this is ever executed, provide a default for the reference identifier. -blockToConTeXt (Header level attr lst) = sectionHeader attr level lst -blockToConTeXt (Table caption aligns widths heads rows) = do - let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' - AlignRight -> 'r' - AlignCenter -> 'c' - AlignDefault -> 'l'): - if colWidth == 0 - then "|" - else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") - let colDescriptors = "|" ++ (concat $ - zipWith colDescriptor widths aligns) - headers <- if all null heads - then return empty - else liftM ($$ "\\HL") $ tableRowToConTeXt heads - captionText <- inlineListToConTeXt caption - rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> (if null caption - then brackets "none" - else empty) - <> braces captionText $$ - "\\starttable" <> brackets (text colDescriptors) $$ - "\\HL" $$ headers $$ - vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline - -tableRowToConTeXt :: [[Block]] -> State WriterState Doc -tableRowToConTeXt cols = do - cols' <- mapM blockListToConTeXt cols - return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR" - -listItemToConTeXt :: [Block] -> State WriterState Doc -listItemToConTeXt list = blockListToConTeXt list >>= - return . ("\\item" $$) . (nest 2) - -defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc -defListItemToConTeXt (term, defs) = do - term' <- inlineListToConTeXt term - def' <- liftM vsep $ mapM blockListToConTeXt defs - return $ "\\startdescription" <> braces term' $$ nest 2 def' $$ - "\\stopdescription" <> blankline - --- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState Doc -blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst - --- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst - -- We add a \strut after a line break that precedes a space, - -- or the space gets swallowed - where addStruts (LineBreak : s : xs) | isSpacey s = - LineBreak : RawInline (Format "context") "\\strut " : s : - addStruts xs - addStruts (x:xs) = x : addStruts xs - addStruts [] = [] - isSpacey Space = True - isSpacey (Str ('\160':_)) = True - isSpacey _ = False - --- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline -- ^ Inline to convert - -> State WriterState Doc -inlineToConTeXt (Emph lst) = do - contents <- inlineListToConTeXt lst - return $ braces $ "\\em " <> contents -inlineToConTeXt (Strong lst) = do - contents <- inlineListToConTeXt lst - return $ braces $ "\\bf " <> contents -inlineToConTeXt (Strikeout lst) = do - contents <- inlineListToConTeXt lst - return $ "\\overstrikes" <> braces contents -inlineToConTeXt (Superscript lst) = do - contents <- inlineListToConTeXt lst - return $ "\\high" <> braces contents -inlineToConTeXt (Subscript lst) = do - contents <- inlineListToConTeXt lst - return $ "\\low" <> braces contents -inlineToConTeXt (SmallCaps lst) = do - contents <- inlineListToConTeXt lst - return $ braces $ "\\sc " <> contents -inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) = - return $ "\\type" <> braces (text str) -inlineToConTeXt (Code _ str) = do - opts <- gets stOptions - return $ "\\mono" <> braces (text $ stringToConTeXt opts str) -inlineToConTeXt (Quoted SingleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ "\\quote" <> braces contents -inlineToConTeXt (Quoted DoubleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ "\\quotation" <> braces contents -inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst -inlineToConTeXt (Str str) = do - opts <- gets stOptions - return $ text $ stringToConTeXt opts str -inlineToConTeXt (Math InlineMath str) = - return $ char '$' <> text str <> char '$' -inlineToConTeXt (Math DisplayMath str) = - return $ text "\\startformula " <> text str <> text " \\stopformula" <> space -inlineToConTeXt (RawInline "context" str) = return $ text str -inlineToConTeXt (RawInline "tex" str) = return $ text str -inlineToConTeXt (RawInline _ _) = return empty -inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr -inlineToConTeXt SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - return $ case wrapText of - WrapAuto -> space - WrapNone -> space - WrapPreserve -> cr -inlineToConTeXt Space = return space --- Handle HTML-like internal document references to sections -inlineToConTeXt (Link _ txt (('#' : ref), _)) = do - opts <- gets stOptions - contents <- inlineListToConTeXt txt - let ref' = toLabel $ stringToConTeXt opts ref - return $ text "\\goto" - <> braces contents - <> brackets (text ref') - -inlineToConTeXt (Link _ txt (src, _)) = do - let isAutolink = txt == [Str (unEscapeString src)] - st <- get - let next = stNextRef st - put $ st {stNextRef = next + 1} - let ref = "url" ++ show next - contents <- inlineListToConTeXt txt - return $ "\\useURL" - <> brackets (text ref) - <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) - <> (if isAutolink - then empty - else brackets empty <> brackets contents) - <> "\\from" - <> brackets (text ref) -inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do - opts <- gets stOptions - let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of - Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] - Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] - Just dim -> - [d <> text (show dim)] - Nothing -> - [] - dimList = showDim Width ++ showDim Height - dims = if null dimList - then empty - else brackets $ cat (intersperse "," dimList) - clas = if null cls - then empty - else brackets $ text $ toLabel $ head cls - src' = if isURI src - then src - else unEscapeString src - return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas -inlineToConTeXt (Note contents) = do - contents' <- blockListToConTeXt contents - let codeBlock x@(CodeBlock _ _) = [x] - codeBlock _ = [] - let codeBlocks = query codeBlock contents - return $ if null codeBlocks - then text "\\footnote{" <> nest 2 contents' <> char '}' - else text "\\startbuffer " <> nest 2 contents' <> - text "\\stopbuffer\\footnote{\\getbuffer}" -inlineToConTeXt (Span (_,_,kvs) ils) = do - let wrapDir txt = case lookup "dir" kvs of - Just "rtl" -> braces $ "\\righttoleft " <> txt - Just "ltr" -> braces $ "\\lefttoright " <> txt - _ -> txt - wrapLang txt = case lookup "lang" kvs of - Just lng -> "\\start\\language[" <> text (fromBcp47' lng) - <> "]" <> txt <> "\\stop " - Nothing -> txt - fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils - --- | Craft the section header, inserting the section reference, if supplied. -sectionHeader :: Attr - -> Int - -> [Inline] - -> State WriterState Doc -sectionHeader (ident,classes,_) hdrLevel lst = do - contents <- inlineListToConTeXt lst - st <- get - let opts = stOptions st - let level' = case writerTopLevelDivision opts of - TopLevelPart -> hdrLevel - 2 - TopLevelChapter -> hdrLevel - 1 - TopLevelSection -> hdrLevel - TopLevelDefault -> hdrLevel - let ident' = toLabel ident - let (section, chapter) = if "unnumbered" `elem` classes - then (text "subject", text "title") - else (text "section", text "chapter") - return $ case level' of - -1 -> text "\\part" <> braces contents - 0 -> char '\\' <> chapter <> braces contents - n | n >= 1 && n <= 5 -> char '\\' - <> text (concat (replicate (n - 1) "sub")) - <> section - <> (if (not . null) ident' - then brackets (text ident') - else empty) - <> braces contents - <> blankline - _ -> contents <> blankline - -fromBcp47' :: String -> String -fromBcp47' = fromBcp47 . splitBy (=='-') - --- Takes a list of the constituents of a BCP 47 language code --- and irons out ConTeXt's exceptions --- https://tools.ietf.org/html/bcp47#section-2.1 --- http://wiki.contextgarden.net/Language_Codes -fromBcp47 :: [String] -> String -fromBcp47 [] = "" -fromBcp47 ("ar":"SY":_) = "ar-sy" -fromBcp47 ("ar":"IQ":_) = "ar-iq" -fromBcp47 ("ar":"JO":_) = "ar-jo" -fromBcp47 ("ar":"LB":_) = "ar-lb" -fromBcp47 ("ar":"DZ":_) = "ar-dz" -fromBcp47 ("ar":"MA":_) = "ar-ma" -fromBcp47 ("de":"1901":_) = "deo" -fromBcp47 ("de":"DE":_) = "de-de" -fromBcp47 ("de":"AT":_) = "de-at" -fromBcp47 ("de":"CH":_) = "de-ch" -fromBcp47 ("el":"poly":_) = "agr" -fromBcp47 ("en":"US":_) = "en-us" -fromBcp47 ("en":"GB":_) = "en-gb" -fromBcp47 ("grc":_) = "agr" -fromBcp47 x = fromIso $ head x - where - fromIso "el" = "gr" - fromIso "eu" = "ba" - fromIso "he" = "il" - fromIso "jp" = "ja" - fromIso "uk" = "ua" - fromIso "vi" = "vn" - fromIso "zh" = "cn" - fromIso l = l diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs deleted file mode 100644 index cf641dcd6..000000000 --- a/src/Text/Pandoc/Writers/Custom.hs +++ /dev/null @@ -1,322 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE FlexibleInstances, OverloadedStrings, - ScopedTypeVariables, DeriveDataTypeable, CPP #-} -#if MIN_VERSION_base(4,8,0) -#else -{-# LANGUAGE OverlappingInstances #-} -#endif -{- Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to custom markup using -a lua writer. --} -module Text.Pandoc.Writers.Custom ( writeCustom ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Data.List ( intersperse ) -import Data.Char ( toLower ) -import Data.Typeable -import Scripting.Lua (LuaState, StackValue, callfunc) -import Text.Pandoc.Writers.Shared -import qualified Scripting.Lua as Lua -import qualified Text.Pandoc.UTF8 as UTF8 -import Control.Monad (when) -import Control.Exception -import qualified Data.Map as M -import Text.Pandoc.Templates -import GHC.IO.Encoding (getForeignEncoding,setForeignEncoding, utf8) - -attrToMap :: Attr -> M.Map String String -attrToMap (id',classes,keyvals) = M.fromList - $ ("id", id') - : ("class", unwords classes) - : keyvals - -#if MIN_VERSION_hslua(0,4,0) -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Char] where -#else -instance StackValue [Char] where -#endif - push lua cs = Lua.push lua (UTF8.fromString cs) - peek lua i = do - res <- Lua.peek lua i - return $ UTF8.toString `fmap` res - valuetype _ = Lua.TSTRING -#else -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue a => StackValue [a] where -#else -instance StackValue a => StackValue [a] where -#endif - push lua xs = do - Lua.createtable lua (length xs + 1) 0 - let addValue (i, x) = Lua.push lua x >> Lua.rawseti lua (-2) i - mapM_ addValue $ zip [1..] xs - peek lua i = do - top <- Lua.gettop lua - let i' = if i < 0 then top + i + 1 else i - Lua.pushnil lua - lst <- getList lua i' - Lua.pop lua 1 - return (Just lst) - valuetype _ = Lua.TTABLE - -getList :: StackValue a => LuaState -> Int -> IO [a] -getList lua i' = do - continue <- Lua.next lua i' - if continue - then do - next <- Lua.peek lua (-1) - Lua.pop lua 1 - x <- maybe (fail "peek returned Nothing") return next - rest <- getList lua i' - return (x : rest) - else return [] -#endif - -instance StackValue Format where - push lua (Format f) = Lua.push lua (map toLower f) - peek l n = fmap Format `fmap` Lua.peek l n - valuetype _ = Lua.TSTRING - -instance (StackValue a, StackValue b) => StackValue (M.Map a b) where - push lua m = do - let xs = M.toList m - Lua.createtable lua (length xs + 1) 0 - let addValue (k, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - mapM_ addValue xs - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -instance (StackValue a, StackValue b) => StackValue (a,b) where - push lua (k,v) = do - Lua.createtable lua 2 0 - Lua.push lua k - Lua.push lua v - Lua.rawset lua (-3) - peek _ _ = undefined -- not needed for our purposes - valuetype _ = Lua.TTABLE - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Inline] where -#else -instance StackValue [Inline] where -#endif - push l ils = Lua.push l =<< inlineListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -#if MIN_VERSION_base(4,8,0) -instance {-# OVERLAPS #-} StackValue [Block] where -#else -instance StackValue [Block] where -#endif - push l ils = Lua.push l =<< blockListToCustom l ils - peek _ _ = undefined - valuetype _ = Lua.TSTRING - -instance StackValue MetaValue where - push l (MetaMap m) = Lua.push l m - push l (MetaList xs) = Lua.push l xs - push l (MetaBool x) = Lua.push l x - push l (MetaString s) = Lua.push l s - push l (MetaInlines ils) = Lua.push l ils - push l (MetaBlocks bs) = Lua.push l bs - peek _ _ = undefined - valuetype (MetaMap _) = Lua.TTABLE - valuetype (MetaList _) = Lua.TTABLE - valuetype (MetaBool _) = Lua.TBOOLEAN - valuetype (MetaString _) = Lua.TSTRING - valuetype (MetaInlines _) = Lua.TSTRING - valuetype (MetaBlocks _) = Lua.TSTRING - -instance StackValue Citation where - push lua cit = do - Lua.createtable lua 6 0 - let addValue (k :: String, v) = Lua.push lua k >> Lua.push lua v >> - Lua.rawset lua (-3) - addValue ("citationId", citationId cit) - addValue ("citationPrefix", citationPrefix cit) - addValue ("citationSuffix", citationSuffix cit) - addValue ("citationMode", show (citationMode cit)) - addValue ("citationNoteNum", citationNoteNum cit) - addValue ("citationHash", citationHash cit) - peek = undefined - valuetype _ = Lua.TTABLE - -data PandocLuaException = PandocLuaException String - deriving (Show, Typeable) - -instance Exception PandocLuaException - --- | Convert Pandoc to custom markup. -writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String -writeCustom luaFile opts doc@(Pandoc meta _) = do - luaScript <- UTF8.readFile luaFile - enc <- getForeignEncoding - setForeignEncoding utf8 - lua <- Lua.newstate - Lua.openlibs lua - status <- Lua.loadstring lua luaScript luaFile - -- check for error in lua script (later we'll change the return type - -- to handle this more gracefully): - when (status /= 0) $ -#if MIN_VERSION_hslua(0,4,0) - Lua.tostring lua 1 >>= throw . PandocLuaException . UTF8.toString -#else - Lua.tostring lua 1 >>= throw . PandocLuaException -#endif - Lua.call lua 0 0 - -- TODO - call hierarchicalize, so we have that info - rendered <- docToCustom lua opts doc - context <- metaToJSON opts - (blockListToCustom lua) - (inlineListToCustom lua) - meta - Lua.close lua - setForeignEncoding enc - let body = rendered - case writerTemplate opts of - Nothing -> return body - Just tpl -> return $ renderTemplate' tpl $ setField "body" body context - -docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String -docToCustom lua opts (Pandoc (Meta metamap) blocks) = do - body <- blockListToCustom lua blocks - callfunc lua "Doc" body metamap (writerVariables opts) - --- | Convert Pandoc block element to Custom. -blockToCustom :: LuaState -- ^ Lua state - -> Block -- ^ Block element - -> IO String - -blockToCustom _ Null = return "" - -blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines - -blockToCustom lua (Para [Image attr txt (src,tit)]) = - callfunc lua "CaptionedImage" src tit txt (attrToMap attr) - -blockToCustom lua (Para inlines) = callfunc lua "Para" inlines - -blockToCustom lua (LineBlock linesList) = callfunc lua "LineBlock" linesList - -blockToCustom lua (RawBlock format str) = - callfunc lua "RawBlock" format str - -blockToCustom lua HorizontalRule = callfunc lua "HorizontalRule" - -blockToCustom lua (Header level attr inlines) = - callfunc lua "Header" level inlines (attrToMap attr) - -blockToCustom lua (CodeBlock attr str) = - callfunc lua "CodeBlock" str (attrToMap attr) - -blockToCustom lua (BlockQuote blocks) = callfunc lua "BlockQuote" blocks - -blockToCustom lua (Table capt aligns widths headers rows') = - callfunc lua "Table" capt (map show aligns) widths headers rows' - -blockToCustom lua (BulletList items) = callfunc lua "BulletList" items - -blockToCustom lua (OrderedList (num,sty,delim) items) = - callfunc lua "OrderedList" items num (show sty) (show delim) - -blockToCustom lua (DefinitionList items) = - callfunc lua "DefinitionList" items - -blockToCustom lua (Div attr items) = - callfunc lua "Div" items (attrToMap attr) - --- | Convert list of Pandoc block elements to Custom. -blockListToCustom :: LuaState -- ^ Options - -> [Block] -- ^ List of block elements - -> IO String -blockListToCustom lua xs = do - blocksep <- callfunc lua "Blocksep" - bs <- mapM (blockToCustom lua) xs - return $ mconcat $ intersperse blocksep bs - --- | Convert list of Pandoc inline elements to Custom. -inlineListToCustom :: LuaState -> [Inline] -> IO String -inlineListToCustom lua lst = do - xs <- mapM (inlineToCustom lua) lst - return $ concat xs - --- | Convert Pandoc inline element to Custom. -inlineToCustom :: LuaState -> Inline -> IO String - -inlineToCustom lua (Str str) = callfunc lua "Str" str - -inlineToCustom lua Space = callfunc lua "Space" - -inlineToCustom lua SoftBreak = callfunc lua "SoftBreak" - -inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst - -inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst - -inlineToCustom lua (Strikeout lst) = callfunc lua "Strikeout" lst - -inlineToCustom lua (Superscript lst) = callfunc lua "Superscript" lst - -inlineToCustom lua (Subscript lst) = callfunc lua "Subscript" lst - -inlineToCustom lua (SmallCaps lst) = callfunc lua "SmallCaps" lst - -inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst - -inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst - -inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs - -inlineToCustom lua (Code attr str) = - callfunc lua "Code" str (attrToMap attr) - -inlineToCustom lua (Math DisplayMath str) = - callfunc lua "DisplayMath" str - -inlineToCustom lua (Math InlineMath str) = - callfunc lua "InlineMath" str - -inlineToCustom lua (RawInline format str) = - callfunc lua "RawInline" format str - -inlineToCustom lua (LineBreak) = callfunc lua "LineBreak" - -inlineToCustom lua (Link attr txt (src,tit)) = - callfunc lua "Link" txt src tit (attrToMap attr) - -inlineToCustom lua (Image attr alt (src,tit)) = - callfunc lua "Image" alt src tit (attrToMap attr) - -inlineToCustom lua (Note contents) = callfunc lua "Note" contents - -inlineToCustom lua (Span attr items) = - callfunc lua "Span" items (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs deleted file mode 100644 index 597851f65..000000000 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ /dev/null @@ -1,440 +0,0 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Docbook XML. --} -module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where -import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Shared -import Text.Pandoc.Walk -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.Math -import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) -import Data.Char ( toLower ) -import Data.Monoid ( Any(..) ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B -import Text.TeXMath -import qualified Text.XML.Light as Xml -import Data.Generics (everywhere, mkT) -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging -import Control.Monad.Reader - -data DocBookVersion = DocBook4 | DocBook5 - deriving (Eq, Show) - -type DB = ReaderT DocBookVersion - --- | Convert list of authors to a docbook <author> section -authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines -authorToDocbook opts name' = do - name <- render Nothing <$> inlinesToDocbook opts name' - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - return $ B.rawInline "docbook" $ render colwidth $ - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = triml rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) - -writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeDocbook4 opts d = - runReaderT (writeDocbook opts d) DocBook4 - -writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeDocbook5 opts d = - runReaderT (writeDocbook opts d) DocBook5 - --- | Convert Pandoc document to string in Docbook format. -writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m String -writeDocbook opts (Pandoc meta blocks) = do - let elements = hierarchicalize blocks - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - let render' = render colwidth - let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) - (writerTemplate opts) && - TopLevelDefault == writerTopLevelDivision opts) - then opts{ writerTopLevelDivision = TopLevelChapter } - else opts - -- The numbering here follows LaTeX's internal numbering - let startLvl = case writerTopLevelDivision opts' of - TopLevelPart -> -1 - TopLevelChapter -> 0 - TopLevelSection -> 1 - TopLevelDefault -> 1 - auths' <- mapM (authorToDocbook opts) $ docAuthors meta - let meta' = B.setMeta "author" auths' meta - metadata <- metaToJSON opts - (fmap (render colwidth . vcat) . - (mapM (elementToDocbook opts' startLvl) . - hierarchicalize)) - (fmap (render colwidth) . inlinesToDocbook opts') - meta' - main <- (render' . vcat) <$> (mapM (elementToDocbook opts' startLvl) elements) - let context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML -> True - _ -> False) - $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - --- | Convert an Element to Docbook. -elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc -elementToDocbook opts _ (Blk block) = blockToDocbook opts block -elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = do - version <- ask - -- Docbook doesn't allow sections with no content, so insert some if needed - let elements' = if null elements - then [Blk (Para [])] - else elements - tag = case lvl of - -1 -> "part" - 0 -> "chapter" - n | n >= 1 && n <= 5 -> if version == DocBook5 - then "section" - else "sect" ++ show n - _ -> "simplesect" - idName = if version == DocBook5 - then "xml:id" - else "id" - idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] - nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] - else [] - attribs = nsAttr ++ idAttr - contents <- mapM (elementToDocbook opts (lvl + 1)) elements' - title' <- inlinesToDocbook opts title - return $ inTags True tag attribs $ - inTagsSimple "title" title' $$ vcat contents - --- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc -blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts) - --- | Auxiliary function to convert Plain block to Para. -plainToPara :: Block -> Block -plainToPara (Plain x) = Para x -plainToPara x = x - --- | Convert a list of pairs of terms and definitions into a list of --- Docbook varlistentrys. -deflistItemsToDocbook :: PandocMonad m - => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc -deflistItemsToDocbook opts items = - vcat <$> mapM (\(term, defs) -> deflistItemToDocbook opts term defs) items - --- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: PandocMonad m - => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc -deflistItemToDocbook opts term defs = do - term' <- inlinesToDocbook opts term - def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs - return $ inTagsIndented "varlistentry" $ - inTagsIndented "term" term' $$ - inTagsIndented "listitem" def' - --- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m Doc -listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items - --- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc -listItemToDocbook opts item = - inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) - -imageToDocbook :: WriterOptions -> Attr -> String -> Doc -imageToDocbook _ attr src = selfClosingTag "imagedata" $ - ("fileref", src) : idAndRole attr ++ dims - where - dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of - Just a -> [(dstr, show a)] - Nothing -> [] - --- | Convert a Pandoc block element to Docbook. -blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m Doc -blockToDocbook _ Null = return empty --- Add ids to paragraphs in divs with ids - this is needed for --- pandoc-citeproc to get link anchors in bibliographies: -blockToDocbook opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in - if hasLineBreaks lst - then (flush . nowrap . inTags False "literallayout" attribs) - <$> inlinesToDocbook opts lst - else inTags True "para" attribs <$> inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = do - contents <- blocksToDocbook opts (map plainToPara bs) - return $ - (if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ contents -blockToDocbook _ (Header _ _ _) = - return empty -- should not occur after hierarchicalize -blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst --- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do - alt <- inlinesToDocbook opts txt - let capt = if null txt - then empty - else inTagsSimple "title" alt - return $ inTagsIndented "figure" $ - capt $$ - (inTagsIndented "mediaobject" $ - (inTagsIndented "imageobject" - (imageToDocbook opts attr src)) $$ - inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToDocbook opts (Para lst) - | hasLineBreaks lst = (flush . nowrap . inTagsSimple "literallayout") - <$> inlinesToDocbook opts lst - | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst -blockToDocbook opts (LineBlock lns) = - blockToDocbook opts $ linesToPara lns -blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" <$> blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ - text ("<programlisting" ++ lang ++ ">") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") - where lang = if null langs - then "" - else " language=\"" ++ escapeStringForXML (head langs) ++ - "\"" - isLang l = map toLower l `elem` map (map toLower) languages - langsFrom s = if isLang s - then [s] - else languagesByExtension . map toLower $ s - langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = do - let attribs = [("spacing", "compact") | isTightList lst] - inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = return empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do - let numeration = case numstyle of - DefaultStyle -> [] - Decimal -> [("numeration", "arabic")] - Example -> [("numeration", "arabic")] - UpperAlpha -> [("numeration", "upperalpha")] - LowerAlpha -> [("numeration", "loweralpha")] - UpperRoman -> [("numeration", "upperroman")] - LowerRoman -> [("numeration", "lowerroman")] - spacing = [("spacing", "compact") | isTightList (first:rest)] - attribs = numeration ++ spacing - items <- if start == 1 - then listItemsToDocbook opts (first:rest) - else do - first' <- blocksToDocbook opts (map plainToPara first) - rest' <- listItemsToDocbook opts rest - return $ - (inTags True "listitem" [("override",show start)] first') $$ - rest' - return $ inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = do - let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] - inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst -blockToDocbook _ b@(RawBlock f str) - | f == "docbook" = return $ text str -- raw XML block - | f == "html" = do - version <- ask - if version == DocBook5 - then return empty -- No html in Docbook5 - else return $ text str -- allow html for backwards compatibility - | otherwise = do - report $ BlockNotRendered b - return empty -blockToDocbook _ HorizontalRule = return empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = do - captionDoc <- if null caption - then return empty - else inTagsIndented "title" <$> - inlinesToDocbook opts caption - let tableType = if isEmpty captionDoc then "informaltable" else "table" - percent w = show (truncate (100*w) :: Integer) ++ "*" - coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" - ([("colwidth", percent w) | w > 0] ++ - [("align", alignmentToString al)])) widths aligns - head' <- if all null headers - then return empty - else inTagsIndented "thead" <$> tableRowToDocbook opts headers - body' <- (inTagsIndented "tbody" . vcat) <$> - mapM (tableRowToDocbook opts) rows - return $ inTagsIndented tableType $ captionDoc $$ - (inTags True "tgroup" [("cols", show (length headers))] $ - coltags $$ head' $$ body') - -hasLineBreaks :: [Inline] -> Bool -hasLineBreaks = getAny . query isLineBreak . walk removeNote - where - removeNote :: Inline -> Inline - removeNote (Note _) = Str "" - removeNote x = x - isLineBreak :: Inline -> Any - isLineBreak LineBreak = Any True - isLineBreak _ = Any False - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToDocbook :: PandocMonad m - => WriterOptions - -> [[Block]] - -> DB m Doc -tableRowToDocbook opts cols = - (inTagsIndented "row" . vcat) <$> mapM (tableItemToDocbook opts) cols - -tableItemToDocbook :: PandocMonad m - => WriterOptions - -> [Block] - -> DB m Doc -tableItemToDocbook opts item = - (inTags True "entry" [] . vcat) <$> mapM (blockToDocbook opts) item - --- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc -inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst - --- | Convert an inline element to Docbook. -inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m Doc -inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] <$> - inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" <$> inlinesToDocbook opts lst -inlineToDocbook opts (SmallCaps lst) = - inTags False "emphasis" [("role", "smallcaps")] <$> - inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Cite _ lst) = - inlinesToDocbook opts lst -inlineToDocbook opts (Span (ident,_,_) ils) = - ((if null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) <>) <$> - inlinesToDocbook opts ils -inlineToDocbook _ (Code _ str) = - return $ inTagsSimple "literal" $ text (escapeStringForXML str) -inlineToDocbook opts (Math t str) - | isMathML (writerHTMLMathMethod opts) = do - res <- convertMath writeMathML t str - case res of - Right r -> return $ inTagsSimple tagtype - $ text $ Xml.ppcElement conf - $ fixNS - $ removeAttr r - Left il -> inlineToDocbook opts il - | otherwise = - texMathToInlines t str >>= inlinesToDocbook opts - where tagtype = case t of - InlineMath -> "inlineequation" - DisplayMath -> "informalequation" - conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP - removeAttr e = e{ Xml.elAttribs = [] } - fixNS' qname = qname{ Xml.qPrefix = Just "mml" } - fixNS = everywhere (mkT fixNS') -inlineToDocbook _ il@(RawInline f x) - | f == "html" || f == "docbook" = return $ text x - | otherwise = do - report $ InlineNotRendered il - return empty -inlineToDocbook _ LineBreak = return $ text "\n" --- currently ignore, would require the option to add custom --- styles to the document -inlineToDocbook _ Space = return space --- because we use \n for LineBreak, we can't do soft breaks: -inlineToDocbook _ SoftBreak = return space -inlineToDocbook opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = - let emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ email - in case txt of - [Str s] | escapeURI s == email -> return emailLink - _ -> do contents <- inlinesToDocbook opts txt - return $ contents <+> - char '(' <> emailLink <> char ')' - | otherwise = do - version <- ask - (if isPrefixOf "#" src - then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr - else if version == DocBook5 - then inTags False "link" $ ("xlink:href", src) : idAndRole attr - else inTags False "ulink" $ ("url", src) : idAndRole attr ) - <$> inlinesToDocbook opts txt -inlineToDocbook opts (Image attr _ (src, tit)) = return $ - let titleDoc = if null tit - then empty - else inTagsIndented "objectinfo" $ - inTagsIndented "title" (text $ escapeStringForXML tit) - in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ imageToDocbook opts attr src -inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" <$> blocksToDocbook opts contents - -isMathML :: HTMLMathMethod -> Bool -isMathML MathML = True -isMathML _ = False - -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role - where - ident = if null id' - then [] - else [("id", id')] - role = if null cls - then [] - else [("role", unwords cls)] diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs deleted file mode 100644 index 56aa29211..000000000 --- a/src/Text/Pandoc/Writers/Docx.hs +++ /dev/null @@ -1,1302 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns, DeriveFunctor #-} -{- -Copyright (C) 2012-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to docx. --} -module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) -import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 -import qualified Data.Map as M -import qualified Data.Set as Set -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip -import Data.Time.Clock.POSIX -import Text.Pandoc.Compat.Time -import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.ImageSize -import Text.Pandoc.Shared hiding (Element) -import Text.Pandoc.Writers.Shared (fixDisplayMath) -import Text.Pandoc.Options -import Text.Pandoc.Writers.Math -import Text.Pandoc.Highlighting ( highlight ) -import Text.Pandoc.Walk -import Text.Pandoc.Error (PandocError) -import Text.XML.Light as XML -import Text.TeXMath -import Text.Pandoc.Readers.Docx.StyleMap -import Control.Monad.Reader -import Control.Monad.State -import Skylighting -import Control.Monad.Except (runExceptT) -import System.Random (randomR) -import Text.Printf (printf) -import Data.Monoid ((<>)) -import qualified Data.Text as T -import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, - extensionFromMimeType) -import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe, mapMaybe, maybeToList, isNothing) -import Data.Char (ord, isSpace, toLower) -import Text.Pandoc.Class (PandocMonad, report) -import qualified Text.Pandoc.Class as P -import Text.Pandoc.Logging - -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - -listMarkerToId :: ListMarker -> String -listMarkerToId NoMarker = "990" -listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = - '9' : '9' : styNum : delimNum : show n - where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' - delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' - -data WriterEnv = WriterEnv{ envTextProperties :: [Element] - , envParaProperties :: [Element] - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: String - , envChangesDate :: String - , envPrintWidth :: Integer - } - -defaultWriterEnv :: WriterEnv -defaultWriterEnv = WriterEnv{ envTextProperties = [] - , envParaProperties = [] - , envRTL = False - , envListLevel = -1 - , envListNumId = 1 - , envInDel = False - , envChangesAuthor = "unknown" - , envChangesDate = "1969-12-31T19:00:00Z" - , envPrintWidth = 1 - } - -data WriterState = WriterState{ - stFootnotes :: [Element] - , stSectionIds :: Set.Set String - , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) - , stLists :: [ListMarker] - , stInsId :: Int - , stDelId :: Int - , stStyleMaps :: StyleMaps - , stFirstPara :: Bool - , stTocTitle :: [Inline] - , stDynamicParaProps :: [String] - , stDynamicTextProps :: [String] - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ - stFootnotes = defaultFootnotes - , stSectionIds = Set.empty - , stExternalLinks = M.empty - , stImages = M.empty - , stLists = [NoMarker] - , stInsId = 1 - , stDelId = 1 - , stStyleMaps = defaultStyleMaps - , stFirstPara = False - , stTocTitle = [Str "Table of Contents"] - , stDynamicParaProps = [] - , stDynamicTextProps = [] - } - -type WS m = ReaderT WriterEnv (StateT WriterState m) - -mknode :: Node t => String -> [(String,String)] -> t -> Element -mknode s attrs = - add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) - -nodename :: String -> QName -nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) - -toLazy :: B.ByteString -> BL.ByteString -toLazy = BL.fromChunks . (:[]) - -renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> - UTF8.fromStringLazy (showElement elt) - -renumIdMap :: Int -> [Element] -> M.Map String String -renumIdMap _ [] = M.empty -renumIdMap n (e:es) - | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) - | otherwise = renumIdMap n es - -replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] -replaceAttr _ _ [] = [] -replaceAttr f val (a:as) | f (attrKey a) = - (XML.Attr (attrKey a) val) : (replaceAttr f val as) - | otherwise = a : (replaceAttr f val as) - -renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element -renumId f renumMap e - | Just oldId <- findAttrBy f e - , Just newId <- M.lookup oldId renumMap = - let attrs' = replaceAttr f newId (elAttribs e) - in - e { elAttribs = attrs' } - | otherwise = e - -renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] -renumIds f renumMap = map (renumId f renumMap) - --- | Certain characters are invalid in XML even if escaped. --- See #1992 -stripInvalidChars :: String -> String -stripInvalidChars = filter isValidChar - --- | See XML reference -isValidChar :: Char -> Bool -isValidChar (ord -> c) - | c == 0x9 = True - | c == 0xA = True - | c == 0xD = True - | 0x20 <= c && c <= 0xD7FF = True - | 0xE000 <= c && c <= 0xFFFD = True - | 0x10000 <= c && c <= 0x10FFFF = True - | otherwise = False - -metaValueToInlines :: MetaValue -> [Inline] -metaValueToInlines (MetaString s) = [Str s] -metaValueToInlines (MetaInlines ils) = ils -metaValueToInlines (MetaBlocks bs) = query return bs -metaValueToInlines (MetaBool b) = [Str $ show b] -metaValueToInlines _ = [] - - - -writeDocx :: (PandocMonad m) - => WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> m BL.ByteString -writeDocx opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath $ doc - username <- P.lookupEnv "USERNAME" - utctime <- P.getCurrentTime - distArchive <- (toArchive . BL.fromStrict) <$> - P.readDataFile datadir "reference.docx" - refArchive <- case writerReferenceDoc opts of - Just f -> toArchive <$> P.readFileLazy f - Nothing -> return distArchive - - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc - - -- Gets the template size - let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) - let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) - - let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) - let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) - let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) - - -- Get the avaible area (converting the size and the margins to int and - -- doing the difference - let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) - <*> ( - (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) - <*> (read <$> mbAttrMarLeft ::Maybe Integer) - ) - - -- styles - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath - - -- parse styledoc for heading styles - let styleMaps = getStyleMaps styledoc - - let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ - metaValueToInlines <$> lookupMeta "toc-title" meta - - let initialSt = defaultWriterState { - stStyleMaps = styleMaps - , stTocTitle = tocTitle - } - - let isRTLmeta = case lookupMeta "dir" meta of - Just (MetaString "rtl") -> True - Just (MetaInlines [Str "rtl"]) -> True - _ -> False - - let env = defaultWriterEnv { - envRTL = isRTLmeta - , envChangesAuthor = fromMaybe "unknown" username - , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime - , envPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - } - - - ((contents, footnotes), st) <- runStateT - (runReaderT - (writeOpenXML opts{writerWrapText = WrapNone} doc') - env) - initialSt - let epochtime = floor $ utcTimeToPOSIXSeconds utctime - let imgs = M.elems $ stImages st - - -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img - let imageEntries = map toImageEntry imgs - - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - - - parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" - let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" - let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" - let headers = filterElements isHeaderNode parsedRels - let footers = filterElements isFooterNode parsedRels - - let extractTarget = findAttr (QName "Target" Nothing Nothing) - - -- we create [Content_Types].xml and word/_rels/document.xml.rels - -- from scratch rather than reading from reference.docx, - -- because Word sometimes changes these files when a reference.docx is modified, - -- e.g. deleting the reference to footnotes.xml or removing default entries - -- for image content types. - - -- [Content_Types].xml - let mkOverrideNode (part', contentType') = mknode "Override" - [("PartName",part'),("ContentType",contentType')] () - let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) - let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) - let overrides = map mkOverrideNode ( - [("/word/webSettings.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") - ,("/word/numbering.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml") - ,("/word/settings.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml") - ,("/word/theme/theme1.xml", - "application/vnd.openxmlformats-officedocument.theme+xml") - ,("/word/fontTable.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml") - ,("/docProps/app.xml", - "application/vnd.openxmlformats-officedocument.extended-properties+xml") - ,("/docProps/core.xml", - "application/vnd.openxmlformats-package.core-properties+xml") - ,("/word/styles.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml") - ,("/word/document.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") - ,("/word/footnotes.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") - ] ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, - "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, - "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ - map mkImageOverride imgs ++ - map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] - - let defaultnodes = [mknode "Default" - [("Extension","xml"),("ContentType","application/xml")] (), - mknode "Default" - [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] - let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides - let contentTypesEntry = toEntry "[Content_Types].xml" epochtime - $ renderXml contentTypesDoc - - -- word/_rels/document.xml.rels - let toBaseRel (url', id', target') = mknode "Relationship" - [("Type",url') - ,("Id",id') - ,("Target",target')] () - let baserels' = map toBaseRel - [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering", - "rId1", - "numbering.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles", - "rId2", - "styles.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings", - "rId3", - "settings.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings", - "rId4", - "webSettings.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable", - "rId5", - "fontTable.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme", - "rId6", - "theme/theme1.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", - "rId7", - "footnotes.xml") - ] - - let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) - let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers - let renumFooters = renumIds (\q -> qName q == "Id") idMap footers - let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () - let imgrels = map toImgRel imgs - let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () - let linkrels = map toLinkRel $ M.toList $ stExternalLinks st - let reldoc = mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] $ baserels ++ imgrels ++ linkrels - let relEntry = toEntry "word/_rels/document.xml.rels" epochtime - $ renderXml reldoc - - - -- adjust contents to add sectPr from reference.docx - let sectpr = case mbsectpr of - Just sectpr' -> let cs = renumIds - (\q -> qName q == "id" && qPrefix q == Just "r") - idMap - (elChildren sectpr') - in - add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs - Nothing -> (mknode "w:sectPr" [] ()) - - -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] contents' - - - - -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime - $ renderXml docContents - - -- footnotes - let notes = mknode "w:footnotes" stdAttributes footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes - - -- footnote rels - let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime - $ renderXml $ mknode "Relationships" [("xmlns","http://schemas.openxmlformats.org/package/2006/relationships")] - linkrels - - -- styles - - -- We only want to inject paragraph and text properties that - -- are not already in the style map. Note that keys in the stylemap - -- are normalized as lowercase. - let newDynamicParaProps = filter - (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sParaStyleMap styleMaps) - (stDynamicParaProps st) - - newDynamicTextProps = filter - (\sty -> isNothing $ M.lookup (toLower <$> sty) $ getMap $ sCharStyleMap styleMaps) - (stDynamicTextProps st) - - let newstyles = map newParaPropToOpenXml newDynamicParaProps ++ - map newTextPropToOpenXml newDynamicTextProps ++ - (case writerHighlightStyle opts of - Nothing -> [] - Just sty -> (styleToOpenXml styleMaps sty)) - let styledoc' = styledoc{ elContent = elContent styledoc ++ - map Elem newstyles } - let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' - - -- construct word/numbering.xml - let numpath = "word/numbering.xml" - numbering <- parseXml refArchive distArchive numpath - newNumElts <- mkNumbering (stLists st) - let allElts = onlyElems (elContent numbering) ++ newNumElts - let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = - -- we want all the abstractNums first, then the nums, - -- otherwise things break: - [Elem e | e <- allElts - , qName (elName e) == "abstractNum" ] ++ - [Elem e | e <- allElts - , qName (elName e) == "num" ] } - let docPropsPath = "docProps/core.xml" - let docProps = mknode "cp:coreProperties" - [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") - ,("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:dcterms","http://purl.org/dc/terms/") - ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") - ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] - $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) - : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) - let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps - - let relsPath = "_rels/.rels" - let rels = mknode "Relationships" [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] - $ map (\attrs -> mknode "Relationship" attrs ()) - [ [("Id","rId1") - ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") - ,("Target","word/document.xml")] - , [("Id","rId4") - ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties") - ,("Target","docProps/app.xml")] - , [("Id","rId3") - ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties") - ,("Target","docProps/core.xml")] - ] - let relsEntry = toEntry relsPath epochtime $ renderXml rels - - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - -- we do, however, copy some settings over from reference - let settingsPath = "word/settings.xml" - settingsList = [ "w:autoHyphenation" - , "w:consecutiveHyphenLimit" - , "w:hyphenationZone" - , "w:doNotHyphenateCap" - ] - settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList - - let entryFromArchive arch path = - maybe (fail $ path ++ " missing in reference docx") - return - (findEntryByPath path arch `mplus` findEntryByPath path distArchive) - docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" - themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" - headerFooterEntries <- mapM (entryFromArchive refArchive) $ - mapMaybe (fmap ("word/" ++) . extractTarget) - (headers ++ footers) - let miscRelEntries = [ e | e <- zEntries refArchive - , "word/_rels/" `isPrefixOf` (eRelativePath e) - , ".xml.rels" `isSuffixOf` (eRelativePath e) - , eRelativePath e /= "word/_rels/document.xml.rels" - , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] - let otherMediaEntries = [ e | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] - - -- Create archive - let archive = foldr addEntryToArchive emptyArchive $ - contentTypesEntry : relsEntry : contentEntry : relEntry : - footnoteRelEntry : numEntry : styleEntry : footnotesEntry : - docPropsEntry : docPropsAppEntry : themeEntry : - fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries ++ headerFooterEntries ++ - miscRelEntries ++ otherMediaEntries - return $ fromArchive archive - - -newParaPropToOpenXml :: String -> Element -newParaPropToOpenXml s = - let styleId = filter (not . isSpace) s - in mknode "w:style" [ ("w:type", "paragraph") - , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () - , mknode "w:basedOn" [("w:val","BodyText")] () - , mknode "w:qFormat" [] () - ] - -newTextPropToOpenXml :: String -> Element -newTextPropToOpenXml s = - let styleId = filter (not . isSpace) s - in mknode "w:style" [ ("w:type", "character") - , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () - , mknode "w:basedOn" [("w:val","BodyTextChar")] () - ] - -styleToOpenXml :: StyleMaps -> Style -> [Element] -styleToOpenXml sm style = - maybeToList parStyle ++ mapMaybe toStyle alltoktypes - where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing - | otherwise = Just $ - mknode "w:style" [("w:type","character"), - ("w:customStyle","1"),("w:styleId",show toktype)] - [ mknode "w:name" [("w:val",show toktype)] () - , mknode "w:basedOn" [("w:val","VerbatimChar")] () - , mknode "w:rPr" [] $ - [ mknode "w:color" [("w:val",tokCol toktype)] () - | tokCol toktype /= "auto" ] ++ - [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () - | tokBg toktype /= "auto" ] ++ - [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ - [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ - [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ] - ] - tokStyles = tokenStyles style - tokFeature f toktype = maybe False f $ lookup toktype tokStyles - tokCol toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenColor =<< lookup toktype tokStyles) - `mplus` defaultColor style - tokBg toktype = maybe "auto" (drop 1 . fromColor) - $ (tokenBackground =<< lookup toktype tokStyles) - `mplus` backgroundColor style - parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing - | otherwise = Just $ - mknode "w:style" [("w:type","paragraph"), - ("w:customStyle","1"),("w:styleId","SourceCode")] - [ mknode "w:name" [("w:val","Source Code")] () - , mknode "w:basedOn" [("w:val","Normal")] () - , mknode "w:link" [("w:val","VerbatimChar")] () - , mknode "w:pPr" [] - $ mknode "w:wordWrap" [("w:val","off")] () - : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) - $ backgroundColor style ) - ] - -copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry -copyChildren refArchive distArchive path timestamp elNames = do - ref <- parseXml refArchive distArchive path - dist <- parseXml distArchive distArchive path - return $ toEntry path timestamp $ renderXml dist{ - elContent = elContent dist ++ copyContent ref - } - where - strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p++":"++name - | otherwise = name - shouldCopy = (`elem` elNames) . strName - cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} - copyContent = map cleanElem . filterChildrenName shouldCopy - --- this is the lowest number used for a list numId -baseListId :: Int -baseListId = 1000 - -mkNumbering :: (PandocMonad m) => [ListMarker] -> m [Element] -mkNumbering lists = do - elts <- mapM mkAbstractNum (ordNub lists) - return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] - -mkNum :: ListMarker -> Int -> Element -mkNum marker numid = - mknode "w:num" [("w:numId",show numid)] - $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () - : case marker of - NoMarker -> [] - BulletMarker -> [] - NumberMarker _ _ start -> - map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - -mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element -mkAbstractNum marker = do - gen <- P.newStdGen - let (nsid, _) = randomR (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) gen - return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () - : mknode "w:multiLevelType" [("w:val","multilevel")] () - : map (mkLvl marker) [0..6] - -mkLvl :: ListMarker -> Int -> Element -mkLvl marker lvl = - mknode "w:lvl" [("w:ilvl",show lvl)] $ - [ mknode "w:start" [("w:val",start)] () - | marker /= NoMarker && marker /= BulletMarker ] ++ - [ mknode "w:numFmt" [("w:val",fmt)] () - , mknode "w:lvlText" [("w:val",lvltxt)] () - , mknode "w:lvlJc" [("w:val","left")] () - , mknode "w:pPr" [] - [ mknode "w:tabs" [] - $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] () - , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] () - ] - ] - where (fmt, lvltxt, start) = - case marker of - NoMarker -> ("bullet"," ","1") - BulletMarker -> ("bullet",bulletFor lvl,"1") - NumberMarker st de n -> (styleFor st lvl - ,patternFor de ("%" ++ show (lvl + 1)) - ,show n) - step = 720 - hang = 480 - bulletFor 0 = "\x2022" -- filled circle - bulletFor 1 = "\x2013" -- en dash - bulletFor 2 = "\x2022" -- hyphen bullet - bulletFor 3 = "\x2013" - bulletFor 4 = "\x2022" - bulletFor 5 = "\x2013" - bulletFor _ = "\x2022" - styleFor UpperAlpha _ = "upperLetter" - styleFor LowerAlpha _ = "lowerLetter" - styleFor UpperRoman _ = "upperRoman" - styleFor LowerRoman _ = "lowerRoman" - styleFor Decimal _ = "decimal" - styleFor DefaultStyle 1 = "decimal" - styleFor DefaultStyle 2 = "lowerLetter" - styleFor DefaultStyle 3 = "lowerRoman" - styleFor DefaultStyle 4 = "decimal" - styleFor DefaultStyle 5 = "lowerLetter" - styleFor DefaultStyle 6 = "lowerRoman" - styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" - patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." - -getNumId :: (PandocMonad m) => WS m Int -getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists - - -makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] -makeTOC opts | writerTableOfContents opts = do - let depth = "1-"++(show (writerTOCDepth opts)) - let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" - tocTitle <- gets stTocTitle - title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) - return $ - [mknode "w:sdt" [] ([ - mknode "w:sdtPr" [] ( - mknode "w:docPartObj" [] ( - [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), - mknode "w:docPartUnique" [] ()] - ) -- w:docPartObj - ), -- w:sdtPr - mknode "w:sdtContent" [] (title++[ - mknode "w:p" [] ( - mknode "w:r" [] ([ - mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), - mknode "w:instrText" [("xml:space","preserve")] tocCmd, - mknode "w:fldChar" [("w:fldCharType","separate")] (), - mknode "w:fldChar" [("w:fldCharType","end")] () - ]) -- w:r - ) -- w:p - ]) - ])] -- w:sdt -makeTOC _ = return [] - - --- | Convert Pandoc document to two lists of --- OpenXML elements (the main document and footnotes). -writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element]) -writeOpenXML opts (Pandoc meta blocks) = do - let tit = docTitle meta ++ case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> LineBreak : xs - _ -> [] - let auths = docAuthors meta - let dat = docDate meta - let abstract' = case lookupMeta "abstract" meta of - Just (MetaBlocks bs) -> bs - Just (MetaInlines ils) -> [Plain ils] - _ -> [] - let subtitle' = case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> xs - Just (MetaBlocks [Para xs]) -> xs - Just (MetaInlines xs) -> xs - _ -> [] - title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ - map Para auths - date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] - abstract <- if null abstract' - then return [] - else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' - let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs - convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs - convertSpace xs = xs - let blocks' = bottomUp convertSpace blocks - doc' <- (setFirstPara >> blocksToOpenXML opts blocks') - notes' <- reverse `fmap` gets stFootnotes - toc <- makeTOC opts - let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc - return (meta' ++ doc', notes') - --- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] -blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls - -pCustomStyle :: String -> Element -pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () - -pStyleM :: (PandocMonad m) => String -> WS m XML.Element -pStyleM styleName = do - styleMaps <- gets stStyleMaps - let sty' = getStyleId styleName $ sParaStyleMap styleMaps - return $ mknode "w:pStyle" [("w:val",sty')] () - -rCustomStyle :: String -> Element -rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () - -rStyleM :: (PandocMonad m) => String -> WS m XML.Element -rStyleM styleName = do - styleMaps <- gets stStyleMaps - let sty' = getStyleId styleName $ sCharStyleMap styleMaps - return $ mknode "w:rStyle" [("w:val",sty')] () - -getUniqueId :: (PandocMonad m) => m String --- the + 20 is to ensure that there are no clashes with the rIds --- already in word/document.xml.rel -getUniqueId = (show . (+ 20)) <$> P.newUniqueHash - --- | Key for specifying user-defined docx styles. -dynamicStyleKey :: String -dynamicStyleKey = "custom-style" - --- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] -blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk - -blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] -blockToOpenXML' _ Null = return [] -blockToOpenXML' opts (Div (ident,classes,kvs) bs) - | Just sty <- lookup dynamicStyleKey kvs = do - modify $ \s -> s{stDynamicParaProps = sty : (stDynamicParaProps s)} - withParaPropM (pStyleM sty) $ blocksToOpenXML opts bs - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - blockToOpenXML opts (Div (ident,classes,kvs') bs) -blockToOpenXML' opts (Div (_,["references"],_) bs) = do - let (hs, bs') = span isHeaderBlock bs - header <- blocksToOpenXML opts hs - -- We put the Bibliography style on paragraphs after the header - rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' - return (header ++ rest) -blockToOpenXML' opts (Div _ bs) = blocksToOpenXML opts bs -blockToOpenXML' opts (Header lev (ident,_,_) lst) = do - setFirstPara - paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ - getParaProps False - contents <- inlinesToOpenXML opts lst - usedIdents <- gets stSectionIds - let bookmarkName = if null ident - then uniqueIdent lst usedIdents - else ident - modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } - id' <- (lift . lift) getUniqueId - let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name",bookmarkName)] () - let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML' opts (Plain lst) = withParaProp (pCustomStyle "Compact") - $ blockToOpenXML opts (Para lst) --- title beginning with fig: indicates that the image is a figure -blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do - setFirstPara - let prop = pCustomStyle $ - if null alt - then "Figure" - else "FigureWithCaption" - paraProps <- local (\env -> env { envParaProperties = prop : envParaProperties env }) (getParaProps False) - contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] - captionNode <- withParaProp (pCustomStyle "ImageCaption") - $ blockToOpenXML opts (Para alt) - return $ mknode "w:p" [] (paraProps ++ contents) : captionNode --- fixDisplayMath sometimes produces a Para [] as artifact -blockToOpenXML' _ (Para []) = return [] -blockToOpenXML' opts (Para lst) = do - isFirstPara <- gets stFirstPara - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - bodyTextStyle <- pStyleM "Body Text" - let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] - [] -> [mknode "w:pPr" [] [bodyTextStyle]] - ps -> ps - modify $ \s -> s { stFirstPara = False } - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] -blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns -blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = do - report $ BlockNotRendered b - return [] -blockToOpenXML' opts (BlockQuote blocks) = do - p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks - setFirstPara - return p -blockToOpenXML' opts (CodeBlock attrs str) = do - p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) - setFirstPara - return p -blockToOpenXML' _ HorizontalRule = do - setFirstPara - return [ - mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] - $ mknode "v:rect" [("style","width:0;height:1.5pt"), - ("o:hralign","center"), - ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table caption aligns widths headers rows) = do - setFirstPara - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaProp (pCustomStyle "TableCaption") - $ blockToOpenXML opts (Para caption) - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) - $ blocksToOpenXML opts cell - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - let borderProps = mknode "w:tcPr" [] - [ mknode "w:tcBorders" [] - $ mknode "w:bottom" [("w:val","single")] () - , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] [pCustomStyle "Compact"]]] - let mkcell border contents = mknode "w:tc" [] - $ [ borderProps | border ] ++ - if null contents - then emptyCell - else contents - let mkrow border cells = mknode "w:tr" [] $ - [mknode "w:trPr" [] [ - mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] - ++ map (mkcell border) cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let rowwidth = fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", show (floor (textwidth * w) :: Integer))] () - let hasHeader = not (all null headers) - return $ - caption' ++ - [mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","TableNormal")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : - mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow True headers' | hasHeader ] ++ - map (mkrow False) rows' - )] -blockToOpenXML' opts (BulletList lst) = do - let marker = BulletMarker - addList marker - numid <- getNumId - l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst - setFirstPara - return l -blockToOpenXML' opts (OrderedList (start, numstyle, numdelim) lst) = do - let marker = NumberMarker numstyle numdelim start - addList marker - numid <- getNumId - l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst - setFirstPara - return l -blockToOpenXML' opts (DefinitionList items) = do - l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items - setFirstPara - return l - -definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] -definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pCustomStyle "DefinitionTerm") - $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pCustomStyle "Definition") - $ concat `fmap` mapM (blocksToOpenXML opts) defs - return $ term' ++ defs' - -addList :: (PandocMonad m) => ListMarker -> WS m () -addList marker = do - lists <- gets stLists - modify $ \st -> st{ stLists = lists ++ [marker] } - -listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] -listItemToOpenXML _ _ [] = return [] -listItemToOpenXML opts numid (first:rest) = do - first' <- withNumId numid $ blockToOpenXML opts first - -- baseListId is the code for no list marker: - rest' <- withNumId baseListId $ blocksToOpenXML opts rest - return $ first' ++ rest' - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - --- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] -inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst - -withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a -withNumId numid = local $ \env -> env{ envListNumId = numid } - -asList :: (PandocMonad m) => WS m a -> WS m a -asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } - -getTextProps :: (PandocMonad m) => WS m [Element] -getTextProps = do - props <- asks envTextProperties - return $ if null props - then [] - else [mknode "w:rPr" [] props] - -withTextProp :: PandocMonad m => Element -> WS m a -> WS m a -withTextProp d p = - local (\env -> env {envTextProperties = d : envTextProperties env}) p - -withTextPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a -withTextPropM = (. flip withTextProp) . (>>=) - -getParaProps :: PandocMonad m => Bool -> WS m [Element] -getParaProps displayMathPara = do - props <- asks envParaProperties - listLevel <- asks envListLevel - numid <- asks envListNumId - let listPr = if listLevel >= 0 && not displayMathPara - then [ mknode "w:numPr" [] - [ mknode "w:numId" [("w:val",show numid)] () - , mknode "w:ilvl" [("w:val",show listLevel)] () ] - ] - else [] - return $ case props ++ listPr of - [] -> [] - ps -> [mknode "w:pPr" [] ps] - -withParaProp :: PandocMonad m => Element -> WS m a -> WS m a -withParaProp d p = - local (\env -> env {envParaProperties = d : envParaProperties env}) p - -withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a -withParaPropM = (. flip withParaProp) . (>>=) - -formattedString :: PandocMonad m => String -> WS m [Element] -formattedString str = do - props <- getTextProps - inDel <- asks envInDel - return [ mknode "w:r" [] $ - props ++ - [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] (stripInvalidChars str) ] ] - -setFirstPara :: PandocMonad m => WS m () -setFirstPara = modify $ \s -> s { stFirstPara = True } - --- | Convert an inline element to OpenXML. -inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] -inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il - -inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] -inlineToOpenXML' _ (Str str) = formattedString str -inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") -inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") -inlineToOpenXML' opts (Span (ident,classes,kvs) ils) - | Just sty <- lookup dynamicStyleKey kvs = do - let kvs' = filter ((dynamicStyleKey, sty)/=) kvs - modify $ \s -> s{stDynamicTextProps = sty : (stDynamicTextProps s)} - withTextProp (rCustomStyle sty) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "rtl" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "rtl")/=) kvs - local (\env -> env { envRTL = True }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | Just "ltr" <- lookup "dir" kvs = do - let kvs' = filter (("dir", "ltr")/=) kvs - local (\env -> env { envRTL = False }) $ - inlineToOpenXML opts (Span (ident,classes,kvs') ils) - | "insertion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - insId <- gets stInsId - modify $ \s -> s{stInsId = (insId + 1)} - x <- inlinesToOpenXML opts ils - return [ mknode "w:ins" [("w:id", (show insId)), - ("w:author", author), - ("w:date", date)] - x ] - | "deletion" `elem` classes = do - defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate - let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - delId <- gets stDelId - modify $ \s -> s{stDelId = (delId + 1)} - x <- local (\env -> env {envInDel = True}) (inlinesToOpenXML opts ils) - return [ mknode "w:del" [("w:id", (show delId)), - ("w:author", author), - ("w:date", date)] - x ] - | otherwise = do - let off x = withTextProp (mknode x [("w:val","0")] ()) - ((if "csl-no-emph" `elem` classes then off "w:i" else id) . - (if "csl-no-strong" `elem` classes then off "w:b" else id) . - (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) - $ inlinesToOpenXML opts ils -inlineToOpenXML' opts (Strong lst) = - withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML' opts (Emph lst) = - withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML' opts (Subscript lst) = - withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ()) - $ inlinesToOpenXML opts lst -inlineToOpenXML' opts (Superscript lst) = - withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ()) - $ inlinesToOpenXML opts lst -inlineToOpenXML' opts (SmallCaps lst) = - withTextProp (mknode "w:smallCaps" [] ()) - $ inlinesToOpenXML opts lst -inlineToOpenXML' opts (Strikeout lst) = - withTextProp (mknode "w:strike" [] ()) - $ inlinesToOpenXML opts lst -inlineToOpenXML' _ LineBreak = return [br] -inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = do - report $ InlineNotRendered il - return [] -inlineToOpenXML' opts (Quoted quoteType lst) = - inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] - where (open, close) = case quoteType of - SingleQuote -> ("\x2018", "\x2019") - DoubleQuote -> ("\x201C", "\x201D") -inlineToOpenXML' opts (Math mathType str) = do - when (mathType == DisplayMath) setFirstPara - res <- (lift . lift) (convertMath writeOMML mathType str) - case res of - Right r -> return [r] - Left il -> inlineToOpenXML' opts il -inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML' opts (Code attrs str) = do - let unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rCustomStyle (show toktype) ] - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] - withTextProp (rCustomStyle "VerbatimChar") - $ case writerHighlightStyle opts >> highlight formatOpenXML attrs str of - Just h -> return h - Nothing -> unhighlighted -inlineToOpenXML' opts (Note bs) = do - notes <- gets stFootnotes - notenum <- (lift . lift) getUniqueId - footnoteStyle <- rStyleM "Footnote Reference" - let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] footnoteStyle - , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker - let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs - insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs - insertNoteRef xs = Para [notemarkerXml] : xs - - contents <- local (\env -> env{ envListLevel = -1 - , envParaProperties = [] - , envTextProperties = [] }) - (withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts - $ insertNoteRef bs) - let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents - modify $ \s -> s{ stFootnotes = newnote : notes } - return [ mknode "w:r" [] - [ mknode "w:rPr" [] footnoteStyle - , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] --- internal link: -inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do - contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt - return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] --- external link: -inlineToOpenXML' opts (Link _ txt (src,_)) = do - contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt - extlinks <- gets stExternalLinks - id' <- case M.lookup src extlinks of - Just i -> return i - Nothing -> do - i <- ("rId"++) `fmap` ((lift . lift) getUniqueId) - modify $ \st -> st{ stExternalLinks = - M.insert src i extlinks } - return i - return [ mknode "w:hyperlink" [("r:id",id')] contents ] -inlineToOpenXML' opts (Image attr alt (src, title)) = do - -- first, check to see if we've already done this image - pageWidth <- asks envPrintWidth - imgs <- gets stImages - case M.lookup src imgs of - Just (_,_,_,elt,_) -> return [elt] - Nothing -> do - res <- runExceptT $ lift (P.fetchItem (writerSourceURL opts) src) - case res of - Left (_ :: PandocError) -> do - report $ CouldNotFetchResource src "" - -- emit alt text - inlinesToOpenXML opts alt - Right (img, mt) -> do - ident <- ("rId"++) `fmap` ((lift . lift) getUniqueId) - let (xpt,ypt) = desiredSizeInPoints opts attr - (either (const def) id (imageSize img)) - -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) - let cNvPicPr = mknode "pic:cNvPicPr" [] $ - mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () - let nvPicPr = mknode "pic:nvPicPr" [] - [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () - , cNvPicPr ] - let blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () - , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] - let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ] - let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ - mknode "a:avLst" [] () - let ln = mknode "a:ln" [("w","9525")] - [ mknode "a:noFill" [] () - , mknode "a:headEnd" [] () - , mknode "a:tailEnd" [] () ] - let spPr = mknode "pic:spPr" [("bwMode","auto")] - [xfrm, prstGeom, mknode "a:noFill" [] (), ln] - let graphic = mknode "a:graphic" [] $ - mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] - [ mknode "pic:pic" [] - [ nvPicPr - , blipFill - , spPr ] ] - let imgElt = mknode "w:r" [] $ - mknode "w:drawing" [] $ - mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () - , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () - , mknode "wp:docPr" [("descr",stringify alt), ("title", title), ("id","1"),("name","Picture")] () - , graphic ] - let imgext = case mt >>= extensionFromMimeType of - Just x -> '.':x - Nothing -> case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Just Eps -> ".eps" - Nothing -> "" - if null imgext - then -- without an extension there is no rule for content type - inlinesToOpenXML opts alt -- return alt to avoid corrupted docx - else do - let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = mt <|> getMimeType imgpath - -- insert mime type to use in constructing [Content_Types].xml - modify $ \st -> st{ stImages = - M.insert src (ident, imgpath, mbMimeType, imgElt, img) - $ stImages st } - return [imgElt] - -br :: Element -br = breakElement "textWrapping" - -breakElement :: String -> Element -breakElement kind = mknode "w:r" [] [mknode "w:br" [("w:type",kind)] () ] - --- Word will insert these footnotes into the settings.xml file --- (whether or not they're visible in the document). If they're in the --- file, but not in the footnotes.xml file, it will produce --- problems. So we want to make sure we insert them into our document. -defaultFootnotes :: [Element] -defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] $ - [ mknode "w:p" [] $ - [mknode "w:r" [] $ - [ mknode "w:separator" [] ()]]] - , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] $ - [ mknode "w:p" [] $ - [ mknode "w:r" [] $ - [ mknode "w:continuationSeparator" [] ()]]]] - -parseXml :: (PandocMonad m) => Archive -> Archive -> String -> m Element -parseXml refArchive distArchive relpath = - case findEntryByPath relpath refArchive `mplus` - findEntryByPath relpath distArchive of - Nothing -> fail $ relpath ++ " missing in reference docx" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Just d -> return d - --- | Scales the image to fit the page --- sizes are passed in emu -fitToPage :: (Double, Double) -> Integer -> (Integer, Integer) -fitToPage (x, y) pageWidth - -- Fixes width to the page width and scales the height - | x > fromIntegral pageWidth = - (pageWidth, floor $ ((fromIntegral pageWidth) / x) * y) - | otherwise = (floor x, floor y) - -withDirection :: PandocMonad m => WS m a -> WS m a -withDirection x = do - isRTL <- asks envRTL - paraProps <- asks envParaProperties - textProps <- asks envTextProperties - -- We want to clean all bidirection (bidi) and right-to-left (rtl) - -- properties from the props first. This is because we don't want - -- them to stack up. - let paraProps' = filter (\e -> (qName . elName) e /= "bidi") paraProps - textProps' = filter (\e -> (qName . elName) e /= "rtl") textProps - if isRTL - -- if we are going right-to-left, we (re?)add the properties. - then flip local x $ - \env -> env { envParaProperties = (mknode "w:bidi" [] ()) : paraProps' - , envTextProperties = (mknode "w:rtl" [] ()) : textProps' - } - else flip local x $ \env -> env { envParaProperties = paraProps' - , envTextProperties = textProps' - } diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs deleted file mode 100644 index 79a371d4d..000000000 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ /dev/null @@ -1,522 +0,0 @@ -{- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : Clare Macrae <clare.macrae@googlemail.com> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to DokuWiki markup. - -DokuWiki: <https://www.dokuwiki.org/dokuwiki> --} - -{- - [ ] Implement nested blockquotes (currently only ever does one level) - [x] Implement alignment of text in tables - [ ] Implement comments - [ ] Work through the Dokuwiki spec, and check I've not missed anything out - [ ] Remove dud/duplicate code --} - -module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions( - writerTableOfContents - , writerTemplate - , writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting - , camelCaseToHyphenated, trimr, substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intersect, intercalate, isPrefixOf, transpose ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) -import Control.Monad.Reader ( ReaderT, runReaderT, ask, local ) -import Text.Pandoc.Class (PandocMonad) - -data WriterState = WriterState { - stNotes :: Bool -- True if there are notes - } - -data WriterEnvironment = WriterEnvironment { - stIndent :: String -- Indent after the marker at the beginning of list items - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list - , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell) - } - -instance Default WriterState where - def = WriterState { stNotes = False } - -instance Default WriterEnvironment where - def = WriterEnvironment { stIndent = "" - , stUseTags = False - , stBackSlashLB = False } - -type DokuWiki = ReaderT WriterEnvironment (State WriterState) - --- | Convert Pandoc to DokuWiki. -writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeDokuWiki opts document = return $ - runDokuWiki (pandocToDokuWiki opts document) - -runDokuWiki :: DokuWiki a -> a -runDokuWiki = flip evalState def . flip runReaderT def - --- | Return DokuWiki representation of document. -pandocToDokuWiki :: WriterOptions -> Pandoc -> DokuWiki String -pandocToDokuWiki opts (Pandoc meta blocks) = do - metadata <- metaToJSON opts - (fmap trimr . blockListToDokuWiki opts) - (inlineListToDokuWiki opts) - meta - body <- blockListToDokuWiki opts blocks - notesExist <- stNotes <$> get - let notes = if notesExist - then "" -- TODO Was "\n<references />" Check whether I can really remove this: - -- if it is definitely to do with footnotes, can remove this whole bit - else "" - let main = body ++ notes - let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Escape special characters for DokuWiki. -escapeString :: String -> String -escapeString = substitute "__" "%%__%%" . - substitute "**" "%%**%%" . - substitute "//" "%%//%%" - --- | Convert Pandoc block element to DokuWiki. -blockToDokuWiki :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> DokuWiki String - -blockToDokuWiki _ Null = return "" - -blockToDokuWiki opts (Div _attrs bs) = do - contents <- blockListToDokuWiki opts bs - return $ contents ++ "\n" - -blockToDokuWiki opts (Plain inlines) = - inlineListToDokuWiki opts inlines - --- title beginning with fig: indicates that the image is a figure --- dokuwiki doesn't support captions - so combine together alt and caption into alt -blockToDokuWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return "" - else (" " ++) `fmap` inlineListToDokuWiki opts txt - let opt = if null txt - then "" - else "|" ++ if null tit then capt else tit ++ capt - -- Relative links fail isURI and receive a colon - prefix = if isURI src then "" else ":" - return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n" - -blockToDokuWiki opts (Para inlines) = do - indent <- stIndent <$> ask - useTags <- stUseTags <$> ask - contents <- inlineListToDokuWiki opts inlines - return $ if useTags - then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>" - else contents ++ if null indent then "\n" else "" - -blockToDokuWiki opts (LineBlock lns) = - blockToDokuWiki opts $ linesToPara lns - -blockToDokuWiki _ (RawBlock f str) - | f == Format "dokuwiki" = return str - -- See https://www.dokuwiki.org/wiki:syntax - -- use uppercase HTML tag for block-level content: - | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" - | otherwise = return "" - -blockToDokuWiki _ HorizontalRule = return "\n----\n" - -blockToDokuWiki opts (Header level _ inlines) = do - -- emphasis, links etc. not allowed in headers, apparently, - -- so we remove formatting: - contents <- inlineListToDokuWiki opts $ removeFormatting inlines - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" - -blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do - let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", - "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", - "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", - "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", - "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", - "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", - "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", - "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - return $ "<code" ++ - (case at of - [] -> ">\n" - (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" - -blockToDokuWiki opts (BlockQuote blocks) = do - contents <- blockListToDokuWiki opts blocks - if isSimpleBlockQuote blocks - then return $ unlines $ map ("> " ++) $ lines contents - else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" - -blockToDokuWiki opts (Table capt aligns _ headers rows) = do - captionDoc <- if null capt - then return "" - else do - c <- inlineListToDokuWiki opts capt - return $ "" ++ c ++ "\n" - headers' <- if all null headers - then return [] - else zipWithM (tableItemToDokuWiki opts) aligns headers - rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') - let padTo (width, al) s = - case (width - length s) of - x | x > 0 -> - if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' - else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' - | otherwise -> s - let renderRow sep cells = sep ++ - intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep - return $ captionDoc ++ - (if null headers' then "" else renderRow "^" headers' ++ "\n") ++ - unlines (map (renderRow "|") rows') - -blockToDokuWiki opts x@(BulletList items) = do - oldUseTags <- stUseTags <$> ask - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - contents <- local (\s -> s { stUseTags = True }) - (mapM (listItemToDokuWiki opts) items) - return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n" - else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " - , stBackSlashLB = backSlash}) - (mapM (listItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" - -blockToDokuWiki opts x@(OrderedList attribs items) = do - oldUseTags <- stUseTags <$> ask - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - contents <- local (\s -> s { stUseTags = True }) - (mapM (orderedListItemToDokuWiki opts) items) - return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n" - else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " - , stBackSlashLB = backSlash}) - (mapM (orderedListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" - --- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there --- is a specific representation of them. --- TODO This creates double '; ; ' if there is a bullet or ordered list inside a definition list -blockToDokuWiki opts x@(DefinitionList items) = do - oldUseTags <- stUseTags <$> ask - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - contents <- local (\s -> s { stUseTags = True }) - (mapM (definitionListItemToDokuWiki opts) items) - return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n" - else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " - , stBackSlashLB = backSlash}) - (mapM (definitionListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" - --- Auxiliary functions for lists: - --- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle - in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ - (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" - else "") - --- | Convert bullet list item (list of blocks) to DokuWiki. -listItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String -listItemToDokuWiki opts items = do - contents <- blockListToDokuWiki opts items - useTags <- stUseTags <$> ask - if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" - else do - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent - return $ indent' ++ "* " ++ contents - --- | Convert ordered list item (list of blocks) to DokuWiki. --- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki -orderedListItemToDokuWiki :: WriterOptions -> [Block] -> DokuWiki String -orderedListItemToDokuWiki opts items = do - contents <- blockListToDokuWiki opts items - useTags <- stUseTags <$> ask - if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" - else do - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent - return $ indent' ++ "- " ++ contents - --- | Convert definition list item (label, list of blocks) to DokuWiki. -definitionListItemToDokuWiki :: WriterOptions - -> ([Inline],[[Block]]) - -> DokuWiki String -definitionListItemToDokuWiki opts (label, items) = do - labelText <- inlineListToDokuWiki opts label - contents <- mapM (blockListToDokuWiki opts) items - useTags <- stUseTags <$> ask - if useTags - then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ - (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) - else do - indent <- stIndent <$> ask - backSlash <- stBackSlashLB <$> ask - let indent' = if backSlash then (drop 2 indent) else indent - return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents - --- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. -isSimpleList :: Block -> Bool -isSimpleList x = - case x of - BulletList items -> all isSimpleListItem items - OrderedList (num, sty, _) items -> all isSimpleListItem items && - num == 1 && sty `elem` [DefaultStyle, Decimal] - DefinitionList items -> all isSimpleListItem $ concatMap snd items - _ -> False - --- | True if list item can be handled with the simple wiki syntax. False if --- HTML tags will be needed. -isSimpleListItem :: [Block] -> Bool -isSimpleListItem [] = True -isSimpleListItem [x] = - case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False -isSimpleListItem [x, y] | isPlainOrPara x = - case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False -isSimpleListItem _ = False - -isPlainOrPara :: Block -> Bool -isPlainOrPara (Plain _) = True -isPlainOrPara (Para _) = True -isPlainOrPara _ = False - -isSimpleBlockQuote :: [Block] -> Bool -isSimpleBlockQuote bs = all isPlainOrPara bs - --- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" - -backSlashLineBreaks :: String -> String -backSlashLineBreaks cs = reverse $ g $ reverse $ concatMap f cs - where f '\n' = "\\\\ " - f c = [c] - g (' ' : '\\':'\\': xs) = xs - g s = s - --- Auxiliary functions for tables: - -tableItemToDokuWiki :: WriterOptions - -> Alignment - -> [Block] - -> DokuWiki String -tableItemToDokuWiki opts align' item = do - let mkcell x = (if align' == AlignRight || align' == AlignCenter - then " " - else "") ++ x ++ - (if align' == AlignLeft || align' == AlignCenter - then " " - else "") - contents <- local (\s -> s { stBackSlashLB = True }) $ - blockListToDokuWiki opts item - return $ mkcell contents - --- | Convert list of Pandoc block elements to DokuWiki. -blockListToDokuWiki :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> DokuWiki String -blockListToDokuWiki opts blocks = do - backSlash <- stBackSlashLB <$> ask - let blocks' = consolidateRawBlocks blocks - if backSlash - then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks' - else vcat <$> mapM (blockToDokuWiki opts) blocks' - -consolidateRawBlocks :: [Block] -> [Block] -consolidateRawBlocks [] = [] -consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) - | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) -consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs - --- | Convert list of Pandoc inline elements to DokuWiki. -inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String -inlineListToDokuWiki opts lst = - concat <$> (mapM (inlineToDokuWiki opts) lst) - --- | Convert Pandoc inline element to DokuWiki. -inlineToDokuWiki :: WriterOptions -> Inline -> DokuWiki String - -inlineToDokuWiki opts (Span _attrs ils) = - inlineListToDokuWiki opts ils - -inlineToDokuWiki opts (Emph lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "//" ++ contents ++ "//" - -inlineToDokuWiki opts (Strong lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "**" ++ contents ++ "**" - -inlineToDokuWiki opts (Strikeout lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "<del>" ++ contents ++ "</del>" - -inlineToDokuWiki opts (Superscript lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "<sup>" ++ contents ++ "</sup>" - -inlineToDokuWiki opts (Subscript lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "<sub>" ++ contents ++ "</sub>" - -inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst - -inlineToDokuWiki opts (Quoted SingleQuote lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "\8216" ++ contents ++ "\8217" - -inlineToDokuWiki opts (Quoted DoubleQuote lst) = do - contents <- inlineListToDokuWiki opts lst - return $ "\8220" ++ contents ++ "\8221" - -inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst - -inlineToDokuWiki _ (Code _ str) = - -- In dokuwiki, text surrounded by '' is really just a font statement, i.e. <tt>, - -- and so other formatting can be present inside. - -- However, in pandoc, and markdown, inlined code doesn't contain formatting. - -- So I have opted for using %% to disable all formatting inside inline code blocks. - -- This gives the best results when converting from other formats to dokuwiki, even if - -- the resultand code is a little ugly, for short strings that don't contain formatting - -- characters. - -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format, - -- any formatting inside inlined code blocks would be lost, or presented incorrectly. - return $ "''%%" ++ str ++ "%%''" - -inlineToDokuWiki _ (Str str) = return $ escapeString str - -inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim - -- note: str should NOT be escaped - where delim = case mathType of - DisplayMath -> "$$" - InlineMath -> "$" - -inlineToDokuWiki _ (RawInline f str) - | f == Format "dokuwiki" = return str - | f == Format "html" = return $ "<html>" ++ str ++ "</html>" - | otherwise = return "" - -inlineToDokuWiki _ LineBreak = return "\\\\\n" - -inlineToDokuWiki opts SoftBreak = - case writerWrapText opts of - WrapNone -> return " " - WrapAuto -> return " " - WrapPreserve -> return "\n" - -inlineToDokuWiki _ Space = return " " - -inlineToDokuWiki opts (Link _ txt (src, _)) = do - label <- inlineListToDokuWiki opts txt - case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" - | escapeURI s == src -> return src - _ -> if isURI src - then return $ "[[" ++ src ++ "|" ++ label ++ "]]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page -inlineToDokuWiki opts (Image attr alt (source, tit)) = do - alt' <- inlineListToDokuWiki opts alt - let txt = case (tit, alt) of - ("", []) -> "" - ("", _ ) -> "|" ++ alt' - (_ , _ ) -> "|" ++ tit - -- Relative links fail isURI and receive a colon - prefix = if isURI source then "" else ":" - return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}" - -inlineToDokuWiki opts (Note contents) = do - contents' <- blockListToDokuWiki opts contents - modify (\s -> s { stNotes = True }) - return $ "((" ++ contents' ++ "))" - -- note - may not work for notes with multiple blocks - -imageDims :: WriterOptions -> Attr -> String -imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) - where - toPx = fmap (showInPixel opts) . checkPct - checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h - go Nothing Nothing = "" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs deleted file mode 100644 index 247014c20..000000000 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ /dev/null @@ -1,1257 +0,0 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} -{- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to EPUB. --} -module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where -import Text.Pandoc.Logging -import qualified Data.Map as M -import qualified Data.Set as Set -import Data.Maybe ( fromMaybe, catMaybes ) -import Data.List ( isPrefixOf, isInfixOf, intercalate ) -import Text.Printf (printf) -import System.FilePath ( takeExtension, takeFileName ) -import Network.HTTP ( urlEncode ) -import qualified Data.ByteString.Lazy as B -import qualified Data.ByteString.Lazy.Char8 as B8 -import qualified Text.Pandoc.UTF8 as UTF8 -import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Text.Pandoc.Compat.Time -import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim - , normalizeDate, stringify - , hierarchicalize ) -import qualified Text.Pandoc.Shared as S (Element(..)) -import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options ( WriterOptions(..) - , WrapOption(..) - , HTMLMathMethod(..) - , EPUBVersion(..) - , ObfuscationMethod(NoObfuscation) ) -import Text.Pandoc.Definition -import Text.Pandoc.Walk (walk, walkM, query) -import Text.Pandoc.UUID (getUUID) -import Control.Monad.State (modify, get, gets, State, StateT, put, evalState, evalStateT, lift) -import Control.Monad (mplus, when, zipWithM) -import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs - , strContent, lookupAttr, Node(..), QName(..), parseXML - , onlyElems, node, ppElement) -import Text.Pandoc.Writers.HTML ( writeHtmlStringForEPUB ) -import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -import Control.Monad.Except (throwError, catchError) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import qualified Text.Pandoc.Class as P - --- A Chapter includes a list of blocks and maybe a section --- number offset. Note, some chapters are unnumbered. The section --- number is different from the index number, which will be used --- in filenames, chapter0003.xhtml. -data Chapter = Chapter (Maybe [Int]) [Block] - -data EPUBState = EPUBState { - stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] - } - -type E m = StateT EPUBState m - -data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheet :: Maybe Stylesheet - , epubPageDirection :: Maybe ProgressionDirection - } deriving Show - -data Stylesheet = StylesheetPath FilePath - | StylesheetContents String - deriving Show - -data Date = Date{ - dateText :: String - , dateEvent :: Maybe String - } deriving Show - -data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String - } deriving Show - -data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String - } deriving Show - -data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String - } deriving Show - -data ProgressionDirection = LTR | RTL deriving Show - -dcName :: String -> QName -dcName n = QName n Nothing (Just "dc") - -dcNode :: Node t => String -> t -> Element -dcNode = node . dcName - -opfName :: String -> QName -opfName n = QName n Nothing (Just "opf") - -toId :: FilePath -> String -toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' - then x - else '_') . takeFileName - -removeNote :: Inline -> Inline -removeNote (Note _) = Str "" -removeNote x = x - -getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata -getEPUBMetadata opts meta = do - let md = metadataFromMeta opts meta - let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts - let md' = foldr addMetadataFromXML md elts - let addIdentifier m = - if null (epubIdentifier m) - then do - randomId <- (show . getUUID) <$> lift P.newStdGen - return $ m{ epubIdentifier = [Identifier randomId Nothing] } - else return m - let addLanguage m = - if null (epubLanguage m) - then case lookup "lang" (writerVariables opts) of - Just x -> return m{ epubLanguage = x } - Nothing -> do - mLang <- lift $ P.lookupEnv "LANG" - let localeLang = - case mLang of - Just lang -> - map (\c -> if c == '_' then '-' else c) $ - takeWhile (/='.') lang - Nothing -> "en-US" - return m{ epubLanguage = localeLang } - else return m - let fixDate m = - if null (epubDate m) - then do - currentTime <- lift P.getCurrentTime - return $ m{ epubDate = [ Date{ - dateText = showDateTimeISO8601 currentTime - , dateEvent = Nothing } ] } - else return m - let addAuthor m = - if any (\c -> creatorRole c == Just "aut") $ epubCreator m - then return m - else do - let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = name - , creatorRole = Just "aut" - , creatorFileAs = Nothing } - return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } - addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage - -addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata -addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md - | name == "identifier" = md{ epubIdentifier = - Identifier{ identifierText = strContent e - , identifierScheme = lookupAttr (opfName "scheme") attrs - } : epubIdentifier md } - | name == "title" = md{ epubTitle = - Title{ titleText = strContent e - , titleFileAs = getAttr "file-as" - , titleType = getAttr "type" - } : epubTitle md } - | name == "date" = md{ epubDate = - Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e - , dateEvent = getAttr "event" - } : epubDate md } - | name == "language" = md{ epubLanguage = strContent e } - | name == "creator" = md{ epubCreator = - Creator{ creatorText = strContent e - , creatorRole = getAttr "role" - , creatorFileAs = getAttr "file-as" - } : epubCreator md } - | name == "contributor" = md{ epubContributor = - Creator { creatorText = strContent e - , creatorRole = getAttr "role" - , creatorFileAs = getAttr "file-as" - } : epubContributor md } - | name == "subject" = md{ epubSubject = strContent e : epubSubject md } - | name == "description" = md { epubDescription = Just $ strContent e } - | name == "type" = md { epubType = Just $ strContent e } - | name == "format" = md { epubFormat = Just $ strContent e } - | name == "type" = md { epubType = Just $ strContent e } - | name == "publisher" = md { epubPublisher = Just $ strContent e } - | name == "source" = md { epubSource = Just $ strContent e } - | name == "relation" = md { epubRelation = Just $ strContent e } - | name == "coverage" = md { epubCoverage = Just $ strContent e } - | name == "rights" = md { epubRights = Just $ strContent e } - | otherwise = md - where getAttr n = lookupAttr (opfName n) attrs -addMetadataFromXML _ md = md - -metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs -metaValueToString (MetaBool True) = "true" -metaValueToString (MetaBool False) = "false" -metaValueToString _ = "" - -getList :: String -> Meta -> (MetaValue -> a) -> [a] -getList s meta handleMetaValue = - case lookupMeta s meta of - Just (MetaList xs) -> map handleMetaValue xs - Just mv -> [handleMetaValue mv] - Nothing -> [] - -getIdentifier :: Meta -> [Identifier] -getIdentifier meta = getList "identifier" meta handleMetaValue - where handleMetaValue (MetaMap m) = - Identifier{ identifierText = maybe "" metaValueToString - $ M.lookup "text" m - , identifierScheme = metaValueToString <$> - M.lookup "scheme" m } - handleMetaValue mv = Identifier (metaValueToString mv) Nothing - -getTitle :: Meta -> [Title] -getTitle meta = getList "title" meta handleMetaValue - where handleMetaValue (MetaMap m) = - Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m - , titleFileAs = metaValueToString <$> M.lookup "file-as" m - , titleType = metaValueToString <$> M.lookup "type" m } - handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing - -getCreator :: String -> Meta -> [Creator] -getCreator s meta = getList s meta handleMetaValue - where handleMetaValue (MetaMap m) = - Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m - , creatorFileAs = metaValueToString <$> M.lookup "file-as" m - , creatorRole = metaValueToString <$> M.lookup "role" m } - handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing - -getDate :: String -> Meta -> [Date] -getDate s meta = getList s meta handleMetaValue - where handleMetaValue (MetaMap m) = - Date{ dateText = maybe "" id $ - M.lookup "text" m >>= normalizeDate' . metaValueToString - , dateEvent = metaValueToString <$> M.lookup "event" m } - handleMetaValue mv = Date { dateText = maybe "" - id $ normalizeDate' $ metaValueToString mv - , dateEvent = Nothing } - -simpleList :: String -> Meta -> [String] -simpleList s meta = - case lookupMeta s meta of - Just (MetaList xs) -> map metaValueToString xs - Just x -> [metaValueToString x] - Nothing -> [] - -metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata -metadataFromMeta opts meta = EPUBMetadata{ - epubIdentifier = identifiers - , epubTitle = titles - , epubDate = date - , epubLanguage = language - , epubCreator = creators - , epubContributor = contributors - , epubSubject = subjects - , epubDescription = description - , epubType = epubtype - , epubFormat = format - , epubPublisher = publisher - , epubSource = source - , epubRelation = relation - , epubCoverage = coverage - , epubRights = rights - , epubCoverImage = coverImage - , epubStylesheet = stylesheet - , epubPageDirection = pageDirection - } - where identifiers = getIdentifier meta - titles = getTitle meta - date = getDate "date" meta - language = maybe "" metaValueToString $ - lookupMeta "language" meta `mplus` lookupMeta "lang" meta - creators = getCreator "creator" meta - contributors = getCreator "contributor" meta - subjects = simpleList "subject" meta - description = metaValueToString <$> lookupMeta "description" meta - epubtype = metaValueToString <$> lookupMeta "type" meta - format = metaValueToString <$> lookupMeta "format" meta - publisher = metaValueToString <$> lookupMeta "publisher" meta - source = metaValueToString <$> lookupMeta "source" meta - relation = metaValueToString <$> lookupMeta "relation" meta - coverage = metaValueToString <$> lookupMeta "coverage" meta - rights = metaValueToString <$> lookupMeta "rights" meta - coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` - (metaValueToString <$> lookupMeta "cover-image" meta) - stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` - ((StylesheetPath . metaValueToString) <$> - lookupMeta "stylesheet" meta) - pageDirection = case map toLower . metaValueToString <$> - lookupMeta "page-progression-direction" meta of - Just "ltr" -> Just LTR - Just "rtl" -> Just RTL - _ -> Nothing - --- | Produce an EPUB2 file from a Pandoc document. -writeEPUB2 :: PandocMonad m - => WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> m B.ByteString -writeEPUB2 = writeEPUB EPUB2 - --- | Produce an EPUB3 file from a Pandoc document. -writeEPUB3 :: PandocMonad m - => WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> m B.ByteString -writeEPUB3 = writeEPUB EPUB3 - --- | Produce an EPUB file from a Pandoc document. -writeEPUB :: PandocMonad m - => EPUBVersion - -> WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> m B.ByteString -writeEPUB epubVersion opts doc = - let initState = EPUBState { stMediaPaths = [] - } - in - evalStateT (pandocToEPUB epubVersion opts doc) - initState - -pandocToEPUB :: PandocMonad m - => EPUBVersion - -> WriterOptions - -> Pandoc - -> E m B.ByteString -pandocToEPUB version opts doc@(Pandoc meta _) = do - let epub3 = version == EPUB3 - let writeHtml o = fmap UTF8.fromStringLazy . - writeHtmlStringForEPUB version o - epochtime <- floor <$> lift P.getPOSIXTime - let mkEntry path content = toEntry path epochtime content - let vars = ("epub3", if epub3 then "true" else "false") - : ("css", "stylesheet.css") - : writerVariables opts - let opts' = opts{ writerEmailObfuscation = NoObfuscation - , writerSectionDivs = True - , writerVariables = vars - , writerHTMLMathMethod = - if epub3 - then MathML - else writerHTMLMathMethod opts - , writerWrapText = WrapAuto } - metadata <- getEPUBMetadata opts' meta - - -- cover page - (cpgEntry, cpicEntry) <- - case epubCoverImage metadata of - Nothing -> return ([],[]) - Just img -> do - let coverImage = "media/" ++ takeFileName img - cpContent <- lift $ writeHtml - opts'{ writerVariables = ("coverpage","true"):vars } - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) - imgContent <- lift $ P.readFileLazy img - return ( [mkEntry "cover.xhtml" cpContent] - , [mkEntry coverImage imgContent] ) - - -- title page - tpContent <- lift $ writeHtml opts'{ - writerVariables = ("titlepage","true"):vars } - (Pandoc meta []) - let tpEntry = mkEntry "title_page.xhtml" tpContent - - -- handle pictures - -- mediaRef <- P.newIORef [] - Pandoc _ blocks <- walkM (transformInline opts') doc >>= - walkM (transformBlock opts') - picEntries <- (catMaybes . map (snd . snd)) <$> (gets stMediaPaths) - -- handle fonts - let matchingGlob f = do - xs <- lift $ P.glob f - when (null xs) $ - report $ CouldNotFetchResource f "glob did not match any font files" - return xs - let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f) - fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') - fontEntries <- mapM mkFontEntry fontFiles - - -- set page progression direction attribution - let progressionDirection = case epubPageDirection metadata of - Just LTR | epub3 -> - [("page-progression-direction", "ltr")] - Just RTL | epub3 -> - [("page-progression-direction", "rtl")] - _ -> [] - - -- body pages - - -- add level 1 header to beginning if none there - let blocks' = addIdentifiers - $ case blocks of - (Header 1 _ _ : _) -> blocks - _ -> Header 1 ("",["unnumbered"],[]) - (docTitle' meta) : blocks - - let chapterHeaderLevel = writerEpubChapterLevel opts - - let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel - isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) = - n <= chapterHeaderLevel - isChapterHeader _ = False - - let toChapters :: [Block] -> State [Int] [Chapter] - toChapters [] = return [] - toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) = - toChapters (bs ++ rest) - toChapters (Header n attr@(_,classes,_) ils : bs) = do - nums <- get - mbnum <- if "unnumbered" `elem` classes - then return Nothing - else case splitAt (n - 1) nums of - (ks, (m:_)) -> do - let nums' = ks ++ [m+1] - put nums' - return $ Just (ks ++ [m]) - -- note, this is the offset not the sec number - (ks, []) -> do - let nums' = ks ++ [1] - put nums' - return $ Just ks - let (xs,ys) = break isChapterHeader bs - (Chapter mbnum (Header n attr ils : xs) :) `fmap` toChapters ys - toChapters (b:bs) = do - let (xs,ys) = break isChapterHeader bs - (Chapter Nothing (b:xs) :) `fmap` toChapters ys - - let chapters' = evalState (toChapters blocks') [] - - let extractLinkURL' :: Int -> Inline -> [(String, String)] - extractLinkURL' num (Span (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] - extractLinkURL' _ _ = [] - - let extractLinkURL :: Int -> Block -> [(String, String)] - extractLinkURL num (Div (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] - extractLinkURL num (Header _ (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] - extractLinkURL num b = query (extractLinkURL' num) b - - let reftable = concat $ zipWith (\(Chapter _ bs) num -> - query (extractLinkURL num) bs) - chapters' [1..] - - let fixInternalReferences :: Inline -> Inline - fixInternalReferences (Link attr lab ('#':xs, tit)) = - case lookup xs reftable of - Just ys -> Link attr lab (ys, tit) - Nothing -> Link attr lab ('#':xs, tit) - fixInternalReferences x = x - - -- internal reference IDs change when we chunk the file, - -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. - -- this fixes that: - let chapters = map (\(Chapter mbnum bs) -> - Chapter mbnum $ walk fixInternalReferences bs) - chapters' - - let chapToEntry num (Chapter mbnum bs) = - mkEntry (showChapter num) <$> - (writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum } - $ case bs of - (Header _ _ xs : _) -> - -- remove notes or we get doubled footnotes - Pandoc (setMeta "title" (walk removeNote $ fromList xs) - nullMeta) bs - _ -> - Pandoc nullMeta bs) - - chapterEntries <- lift $ zipWithM chapToEntry [1..] chapters - - -- incredibly inefficient (TODO): - let containsMathML ent = epub3 && - "<math" `isInfixOf` (B8.unpack $ fromEntry ent) - let containsSVG ent = epub3 && - "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) - let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] - - -- contents.opf - let chapterNode ent = unode "item" ! - ([("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", "application/xhtml+xml")] - ++ case props ent of - [] -> [] - xs -> [("properties", unwords xs)]) - $ () - let chapterRefNode ent = unode "itemref" ! - [("idref", toId $ eRelativePath ent)] $ () - let pictureNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "application/octet-stream" - $ mediaTypeOf $ eRelativePath ent)] $ () - let fontNode ent = unode "item" ! - [("id", toId $ eRelativePath ent), - ("href", eRelativePath ent), - ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () - let plainTitle = case docTitle' meta of - [] -> case epubTitle metadata of - [] -> "UNTITLED" - (x:_) -> titleText x - x -> stringify x - - let tocTitle = fromMaybe plainTitle $ - metaValueToString <$> lookupMeta "toc-title" meta - uuid <- case epubIdentifier metadata of - (x:_) -> return $ identifierText x -- use first identifier as UUID - [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen - currentTime <- lift $ P.getCurrentTime - let contentsData = UTF8.fromStringLazy $ ppTopElement $ - unode "package" ! [("version", case version of - EPUB2 -> "2.0" - EPUB3 -> "3.0") - ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","epub-id-1")] $ - [ metadataElement version metadata currentTime - , unode "manifest" $ - [ unode "item" ! [("id","ncx"), ("href","toc.ncx") - ,("media-type","application/x-dtbncx+xml")] $ () - , unode "item" ! [("id","style"), ("href","stylesheet.css") - ,("media-type","text/css")] $ () - , unode "item" ! ([("id","nav") - ,("href","nav.xhtml") - ,("media-type","application/xhtml+xml")] ++ - [("properties","nav") | epub3 ]) $ () - ] ++ - map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - (case cpicEntry of - [] -> [] - (x:_) -> [add_attrs - [Attr (unqual "properties") "cover-image" | epub3] - (pictureNode x)]) ++ - map pictureNode picEntries ++ - map fontNode fontEntries - , unode "spine" ! ([("toc","ncx")] ++ progressionDirection) $ - case epubCoverImage metadata of - Nothing -> [] - Just _ -> [ unode "itemref" ! - [("idref", "cover_xhtml")] $ () ] - ++ ((unode "itemref" ! [("idref", "title_page_xhtml") - ,("linear", - case lookupMeta "title" meta of - Just _ -> "yes" - Nothing -> "no")] $ ()) : - [unode "itemref" ! [("idref", "nav")] $ () - | writerTableOfContents opts ] ++ - map chapterRefNode chapterEntries) - , unode "guide" $ - [ unode "reference" ! - [("type","toc"),("title", tocTitle), - ("href","nav.xhtml")] $ () - ] ++ - [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing - ] - ] - let contentsEntry = mkEntry "content.opf" contentsData - - -- toc.ncx - let secs = hierarchicalize blocks' - - let tocLevel = writerTOCDepth opts - - let navPointNode :: PandocMonad m - => (Int -> String -> String -> [Element] -> Element) - -> S.Element -> StateT Int m Element - navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do - n <- get - modify (+1) - let showNums :: [Int] -> String - showNums = intercalate "." . map show - let tit' = stringify ils - let tit = if writerNumberSections opts && not (null nums) - then showNums nums ++ " " ++ tit' - else tit' - src <- case lookup ident reftable of - Just x -> return x - Nothing -> throwError $ PandocSomeError $ ident ++ " not found in reftable" - let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel - isSec _ = False - let subsecs = filter isSec children - subs <- mapM (navPointNode formatter) subsecs - return $ formatter n tit src subs - navPointNode _ (S.Blk _) = throwError $ PandocSomeError "navPointNode encountered Blk" - - let navMapFormatter :: Int -> String -> String -> [Element] -> Element - navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" tit - , unode "content" ! [("src", src)] $ () - ] ++ subs - - let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) - , unode "content" ! [("src","title_page.xhtml")] $ () ] - - navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1 - let tocData = UTF8.fromStringLazy $ ppTopElement $ - unode "ncx" ! [("version","2005-1") - ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ - [ unode "head" $ - [ unode "meta" ! [("name","dtb:uid") - ,("content", uuid)] $ () - , unode "meta" ! [("name","dtb:depth") - ,("content", "1")] $ () - , unode "meta" ! [("name","dtb:totalPageCount") - ,("content", "0")] $ () - , unode "meta" ! [("name","dtb:maxPageNumber") - ,("content", "0")] $ () - ] ++ case epubCoverImage metadata of - Nothing -> [] - Just img -> [unode "meta" ! [("name","cover"), - ("content", toId img)] $ ()] - , unode "docTitle" $ unode "text" $ plainTitle - , unode "navMap" $ - tpNode : navMap - ] - let tocEntry = mkEntry "toc.ncx" tocData - - let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element - navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" ++ show n)] $ - (unode "a" ! [("href",src)] - $ tit) - : case subs of - [] -> [] - (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - - let navtag = if epub3 then "nav" else "div" - tocBlocks <- lift $ evalStateT (mapM (navPointNode navXhtmlFormatter) secs) 1 - let navBlocks = [RawBlock (Format "html") $ ppElement $ - unode navtag ! ([("epub:type","toc") | epub3] ++ - [("id","toc")]) $ - [ unode "h1" ! [("id","toc-title")] $ tocTitle - , unode "ol" ! [("class","toc")] $ tocBlocks ]] - let landmarks = if epub3 - then [RawBlock (Format "html") $ ppElement $ - unode "nav" ! [("epub:type","landmarks") - ,("hidden","hidden")] $ - [ unode "ol" $ - [ unode "li" - [ unode "a" ! [("href", "cover.xhtml") - ,("epub:type", "cover")] $ - "Cover"] | - epubCoverImage metadata /= Nothing - ] ++ - [ unode "li" - [ unode "a" ! [("href", "#toc") - ,("epub:type", "toc")] $ - "Table of contents" - ] | writerTableOfContents opts - ] - ] - ] - else [] - navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars } - (Pandoc (setMeta "title" - (walk removeNote $ fromList $ docTitle' meta) nullMeta) - (navBlocks ++ landmarks)) - let navEntry = mkEntry "nav.xhtml" navData - - -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" - - -- container.xml - let containerData = UTF8.fromStringLazy $ ppTopElement $ - unode "container" ! [("version","1.0") - ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ - unode "rootfiles" $ - unode "rootfile" ! [("full-path","content.opf") - ,("media-type","application/oebps-package+xml")] $ () - let containerEntry = mkEntry "META-INF/container.xml" containerData - - -- com.apple.ibooks.display-options.xml - let apple = UTF8.fromStringLazy $ ppTopElement $ - unode "display_options" $ - unode "platform" ! [("name","*")] $ - unode "option" ! [("name","specified-fonts")] $ "true" - let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple - - -- stylesheet - stylesheet <- case epubStylesheet metadata of - Just (StylesheetPath fp) -> UTF8.toStringLazy <$> (lift $ P.readFileLazy fp) - Just (StylesheetContents s) -> return s - Nothing -> UTF8.toString `fmap` - (lift $ P.readDataFile (writerUserDataDir opts) "epub.css") - let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet - - -- construct archive - let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : - contentsEntry : tocEntry : navEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) - return $ fromArchive archive - -metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element -metadataElement version md currentTime = - unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes - where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes - ++ creatorNodes ++ contributorNodes ++ subjectNodes - ++ descriptionNodes ++ typeNodes ++ formatNodes - ++ publisherNodes ++ sourceNodes ++ relationNodes - ++ coverageNodes ++ rightsNodes ++ coverImageNodes - ++ modifiedNodes - withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) - ([1..] :: [Int])) - identifierNodes = withIds "epub-id" toIdentifierNode $ - epubIdentifier md - titleNodes = withIds "epub-title" toTitleNode $ epubTitle md - dateNodes = if version == EPUB2 - then withIds "epub-date" toDateNode $ epubDate md - else -- epub3 allows only one dc:date - -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate - case epubDate md of - [] -> [] - (x:_) -> [dcNode "date" ! [("id","epub-date")] - $ dateText x] - languageNodes = [dcTag "language" $ epubLanguage md] - creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ - epubCreator md - contributorNodes = withIds "epub-contributor" - (toCreatorNode "contributor") $ epubContributor md - subjectNodes = map (dcTag "subject") $ epubSubject md - descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md - typeNodes = maybe [] (dcTag' "type") $ epubType md - formatNodes = maybe [] (dcTag' "format") $ epubFormat md - publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md - sourceNodes = maybe [] (dcTag' "source") $ epubSource md - relationNodes = maybe [] (dcTag' "relation") $ epubRelation md - coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md - rightsNodes = maybe [] (dcTag' "rights") $ epubRights md - coverImageNodes = maybe [] - (\img -> [unode "meta" ! [("name","cover"), - ("content",toId img)] $ ()]) - $ epubCoverImage md - modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3 ] - dcTag n s = unode ("dc:" ++ n) s - dcTag' n s = [dcTag n s] - toIdentifierNode id' (Identifier txt scheme) - | version == EPUB2 = [dcNode "identifier" ! - ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $ - txt] - | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++ - maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","identifier-type"), - ("scheme","onix:codelist5")] $ x]) - (schemeToOnix `fmap` scheme) - toCreatorNode s id' creator - | version == EPUB2 = [dcNode s ! - (("id",id') : - maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ - maybe [] (\x -> [("opf:role",x)]) - (creatorRole creator >>= toRelator)) $ creatorText creator] - | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ - maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) - (creatorFileAs creator) ++ - maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","role"), - ("scheme","marc:relators")] $ x]) - (creatorRole creator >>= toRelator) - toTitleNode id' title - | version == EPUB2 = [dcNode "title" ! - (("id",id') : - -- note: EPUB2 doesn't accept opf:title-type - maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $ - titleText title] - | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] - ++ - maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) - (titleFileAs title) ++ - maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","title-type")] $ x]) - (titleType title) - toDateNode id' date = [dcNode "date" ! - (("id",id') : - maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ - dateText date] - schemeToOnix "ISBN-10" = "02" - schemeToOnix "GTIN-13" = "03" - schemeToOnix "UPC" = "04" - schemeToOnix "ISMN-10" = "05" - schemeToOnix "DOI" = "06" - schemeToOnix "LCCN" = "13" - schemeToOnix "GTIN-14" = "14" - schemeToOnix "ISBN-13" = "15" - schemeToOnix "Legal deposit number" = "17" - schemeToOnix "URN" = "22" - schemeToOnix "OCLC" = "23" - schemeToOnix "ISMN-13" = "25" - schemeToOnix "ISBN-A" = "26" - schemeToOnix "JP" = "27" - schemeToOnix "OLCC" = "28" - schemeToOnix _ = "01" - -showDateTimeISO8601 :: UTCTime -> String -showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" - -transformTag :: PandocMonad m - => WriterOptions - -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Tag String - -> E m (Tag String) -transformTag opts tag@(TagOpen name attr) - | name `elem` ["video", "source", "img", "audio"] && - lookup "data-external" attr == Nothing = do - let src = fromAttrib "src" tag - let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef opts src - newposter <- modifyMediaRef opts poster - let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", newsrc) | not (null newsrc)] ++ - [("poster", newposter) | not (null newposter)] - return $ TagOpen name attr' -transformTag _ tag = return tag - -modifyMediaRef :: PandocMonad m - => WriterOptions - -> FilePath - -> E m FilePath -modifyMediaRef _ "" = return "" -modifyMediaRef opts oldsrc = do - media <- gets stMediaPaths - case lookup oldsrc media of - Just (n,_) -> return n - Nothing -> catchError - (do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc - let new = "media/file" ++ show (length media) ++ - fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) - epochtime <- floor `fmap` lift P.getPOSIXTime - let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img - modify $ \st -> st{ stMediaPaths = - (oldsrc, (new, Just entry)):media} - return new) - (\e -> do - report $ CouldNotFetchResource oldsrc (show e) - return oldsrc) - -transformBlock :: PandocMonad m - => WriterOptions - -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media - -> Block - -> E m Block -transformBlock opts (RawBlock fmt raw) - | fmt == Format "html" = do - let tags = parseTags raw - tags' <- mapM (transformTag opts) tags - return $ RawBlock fmt (renderTags' tags') -transformBlock _ b = return b - -transformInline :: PandocMonad m - => WriterOptions - -- -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media - -> Inline - -> E m Inline -transformInline opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef opts src - return $ Image attr lab (newsrc, tit) -transformInline opts (x@(Math t m)) - | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef opts (url ++ urlEncode m) - let mathclass = if t == DisplayMath then "display" else "inline" - return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")] -transformInline opts (RawInline fmt raw) - | fmt == Format "html" = do - let tags = parseTags raw - tags' <- mapM (transformTag opts) tags - return $ RawInline fmt (renderTags' tags') -transformInline _ x = return x - -(!) :: (t -> Element) -> [(String, String)] -> t -> Element -(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) - --- | Version of 'ppTopElement' that specifies UTF-8 encoding. -ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement - -- unEntity removes numeric entities introduced by ppElement - -- (kindlegen seems to choke on these). - where unEntity [] = "" - unEntity ('&':'#':xs) = - let (ds,ys) = break (==';') xs - rest = drop 1 ys - in case safeRead ('\'':'\\':ds ++ "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs - unEntity (x:xs) = x : unEntity xs - -mediaTypeOf :: FilePath -> Maybe MimeType -mediaTypeOf x = - let mediaPrefixes = ["image", "video", "audio"] in - case getMimeType x of - Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y - _ -> Nothing - --- Returns filename for chapter number. -showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" - --- Add identifiers to any headers without them. -addIdentifiers :: [Block] -> [Block] -addIdentifiers bs = evalState (mapM go bs) Set.empty - where go (Header n (ident,classes,kvs) ils) = do - ids <- get - let ident' = if null ident - then uniqueIdent ils ids - else ident - modify $ Set.insert ident' - return $ Header n (ident',classes,kvs) ils - go x = return x - --- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM -normalizeDate' :: String -> Maybe String -normalizeDate' xs = - let xs' = trim xs in - case xs' of - [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY - [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM - -> Just xs' - _ -> normalizeDate xs' - -toRelator :: String -> Maybe String -toRelator x - | x `elem` relators = Just x - | otherwise = lookup (map toLower x) relatorMap - -relators :: [String] -relators = map snd relatorMap - -relatorMap :: [(String, String)] -relatorMap = - [("abridger", "abr") - ,("actor", "act") - ,("adapter", "adp") - ,("addressee", "rcp") - ,("analyst", "anl") - ,("animator", "anm") - ,("annotator", "ann") - ,("appellant", "apl") - ,("appellee", "ape") - ,("applicant", "app") - ,("architect", "arc") - ,("arranger", "arr") - ,("art copyist", "acp") - ,("art director", "adi") - ,("artist", "art") - ,("artistic director", "ard") - ,("assignee", "asg") - ,("associated name", "asn") - ,("attributed name", "att") - ,("auctioneer", "auc") - ,("author", "aut") - ,("author in quotations or text abstracts", "aqt") - ,("author of afterword, colophon, etc.", "aft") - ,("author of dialog", "aud") - ,("author of introduction, etc.", "aui") - ,("autographer", "ato") - ,("bibliographic antecedent", "ant") - ,("binder", "bnd") - ,("binding designer", "bdd") - ,("blurb writer", "blw") - ,("book designer", "bkd") - ,("book producer", "bkp") - ,("bookjacket designer", "bjd") - ,("bookplate designer", "bpd") - ,("bookseller", "bsl") - ,("braille embosser", "brl") - ,("broadcaster", "brd") - ,("calligrapher", "cll") - ,("cartographer", "ctg") - ,("caster", "cas") - ,("censor", "cns") - ,("choreographer", "chr") - ,("cinematographer", "cng") - ,("client", "cli") - ,("collection registrar", "cor") - ,("collector", "col") - ,("collotyper", "clt") - ,("colorist", "clr") - ,("commentator", "cmm") - ,("commentator for written text", "cwt") - ,("compiler", "com") - ,("complainant", "cpl") - ,("complainant-appellant", "cpt") - ,("complainant-appellee", "cpe") - ,("composer", "cmp") - ,("compositor", "cmt") - ,("conceptor", "ccp") - ,("conductor", "cnd") - ,("conservator", "con") - ,("consultant", "csl") - ,("consultant to a project", "csp") - ,("contestant", "cos") - ,("contestant-appellant", "cot") - ,("contestant-appellee", "coe") - ,("contestee", "cts") - ,("contestee-appellant", "ctt") - ,("contestee-appellee", "cte") - ,("contractor", "ctr") - ,("contributor", "ctb") - ,("copyright claimant", "cpc") - ,("copyright holder", "cph") - ,("corrector", "crr") - ,("correspondent", "crp") - ,("costume designer", "cst") - ,("court governed", "cou") - ,("court reporter", "crt") - ,("cover designer", "cov") - ,("creator", "cre") - ,("curator", "cur") - ,("dancer", "dnc") - ,("data contributor", "dtc") - ,("data manager", "dtm") - ,("dedicatee", "dte") - ,("dedicator", "dto") - ,("defendant", "dfd") - ,("defendant-appellant", "dft") - ,("defendant-appellee", "dfe") - ,("degree granting institution", "dgg") - ,("delineator", "dln") - ,("depicted", "dpc") - ,("depositor", "dpt") - ,("designer", "dsr") - ,("director", "drt") - ,("dissertant", "dis") - ,("distribution place", "dbp") - ,("distributor", "dst") - ,("donor", "dnr") - ,("draftsman", "drm") - ,("dubious author", "dub") - ,("editor", "edt") - ,("editor of compilation", "edc") - ,("editor of moving image work", "edm") - ,("electrician", "elg") - ,("electrotyper", "elt") - ,("enacting jurisdiction", "enj") - ,("engineer", "eng") - ,("engraver", "egr") - ,("etcher", "etr") - ,("event place", "evp") - ,("expert", "exp") - ,("facsimilist", "fac") - ,("field director", "fld") - ,("film director", "fmd") - ,("film distributor", "fds") - ,("film editor", "flm") - ,("film producer", "fmp") - ,("filmmaker", "fmk") - ,("first party", "fpy") - ,("forger", "frg") - ,("former owner", "fmo") - ,("funder", "fnd") - ,("geographic information specialist", "gis") - ,("honoree", "hnr") - ,("host", "hst") - ,("host institution", "his") - ,("illuminator", "ilu") - ,("illustrator", "ill") - ,("inscriber", "ins") - ,("instrumentalist", "itr") - ,("interviewee", "ive") - ,("interviewer", "ivr") - ,("inventor", "inv") - ,("issuing body", "isb") - ,("judge", "jud") - ,("jurisdiction governed", "jug") - ,("laboratory", "lbr") - ,("laboratory director", "ldr") - ,("landscape architect", "lsa") - ,("lead", "led") - ,("lender", "len") - ,("libelant", "lil") - ,("libelant-appellant", "lit") - ,("libelant-appellee", "lie") - ,("libelee", "lel") - ,("libelee-appellant", "let") - ,("libelee-appellee", "lee") - ,("librettist", "lbt") - ,("licensee", "lse") - ,("licensor", "lso") - ,("lighting designer", "lgd") - ,("lithographer", "ltg") - ,("lyricist", "lyr") - ,("manufacture place", "mfp") - ,("manufacturer", "mfr") - ,("marbler", "mrb") - ,("markup editor", "mrk") - ,("metadata contact", "mdc") - ,("metal-engraver", "mte") - ,("moderator", "mod") - ,("monitor", "mon") - ,("music copyist", "mcp") - ,("musical director", "msd") - ,("musician", "mus") - ,("narrator", "nrt") - ,("onscreen presenter", "osp") - ,("opponent", "opn") - ,("organizer of meeting", "orm") - ,("originator", "org") - ,("other", "oth") - ,("owner", "own") - ,("panelist", "pan") - ,("papermaker", "ppm") - ,("patent applicant", "pta") - ,("patent holder", "pth") - ,("patron", "pat") - ,("performer", "prf") - ,("permitting agency", "pma") - ,("photographer", "pht") - ,("plaintiff", "ptf") - ,("plaintiff-appellant", "ptt") - ,("plaintiff-appellee", "pte") - ,("platemaker", "plt") - ,("praeses", "pra") - ,("presenter", "pre") - ,("printer", "prt") - ,("printer of plates", "pop") - ,("printmaker", "prm") - ,("process contact", "prc") - ,("producer", "pro") - ,("production company", "prn") - ,("production designer", "prs") - ,("production manager", "pmn") - ,("production personnel", "prd") - ,("production place", "prp") - ,("programmer", "prg") - ,("project director", "pdr") - ,("proofreader", "pfr") - ,("provider", "prv") - ,("publication place", "pup") - ,("publisher", "pbl") - ,("publishing director", "pbd") - ,("puppeteer", "ppt") - ,("radio director", "rdd") - ,("radio producer", "rpc") - ,("recording engineer", "rce") - ,("recordist", "rcd") - ,("redaktor", "red") - ,("renderer", "ren") - ,("reporter", "rpt") - ,("repository", "rps") - ,("research team head", "rth") - ,("research team member", "rtm") - ,("researcher", "res") - ,("respondent", "rsp") - ,("respondent-appellant", "rst") - ,("respondent-appellee", "rse") - ,("responsible party", "rpy") - ,("restager", "rsg") - ,("restorationist", "rsr") - ,("reviewer", "rev") - ,("rubricator", "rbr") - ,("scenarist", "sce") - ,("scientific advisor", "sad") - ,("screenwriter", "aus") - ,("scribe", "scr") - ,("sculptor", "scl") - ,("second party", "spy") - ,("secretary", "sec") - ,("seller", "sll") - ,("set designer", "std") - ,("setting", "stg") - ,("signer", "sgn") - ,("singer", "sng") - ,("sound designer", "sds") - ,("speaker", "spk") - ,("sponsor", "spn") - ,("stage director", "sgd") - ,("stage manager", "stm") - ,("standards body", "stn") - ,("stereotyper", "str") - ,("storyteller", "stl") - ,("supporting host", "sht") - ,("surveyor", "srv") - ,("teacher", "tch") - ,("technical director", "tcd") - ,("television director", "tld") - ,("television producer", "tlp") - ,("thesis advisor", "ths") - ,("transcriber", "trc") - ,("translator", "trl") - ,("type designer", "tyd") - ,("typographer", "tyg") - ,("university place", "uvp") - ,("videographer", "vdg") - ,("witness", "wit") - ,("wood engraver", "wde") - ,("woodcutter", "wdc") - ,("writer of accompanying material", "wam") - ,("writer of added commentary", "wac") - ,("writer of added lyrics", "wal") - ,("writer of added text", "wat") - ] - -docTitle' :: Meta -> [Inline] -docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta - where go (MetaString s) = [Str s] - go (MetaInlines xs) = xs - go (MetaBlocks [Para xs]) = xs - go (MetaBlocks [Plain xs]) = xs - go (MetaMap m) = - case M.lookup "type" m of - Just x | stringify x == "main" -> - maybe [] go $ M.lookup "text" m - _ -> [] - go (MetaList xs) = concatMap go xs - go _ = [] diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs deleted file mode 100644 index 967fe6a4c..000000000 --- a/src/Text/Pandoc/Writers/FB2.hs +++ /dev/null @@ -1,617 +0,0 @@ -{-# LANGUAGE PatternGuards #-} - -{- -Copyright (c) 2011-2012, Sergey Astanin -All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. - -FictionBook is an XML-based e-book format. For more information see: -<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> - --} -module Text.Pandoc.Writers.FB2 (writeFB2) where - -import Control.Monad.State (StateT, evalStateT, get, modify, lift) -import Control.Monad.State (liftM) -import Data.ByteString.Base64 (encode) -import Data.Char (toLower, isSpace, isAscii, isControl) -import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix) -import Data.Either (lefts, rights) -import Network.HTTP (urlEncode) -import Network.URI (isURI) -import Text.XML.Light -import qualified Text.XML.Light as X -import qualified Text.XML.Light.Cursor as XC -import qualified Data.ByteString.Char8 as B8 -import Control.Monad.Except (throwError, catchError) - -import Text.Pandoc.Logging -import Text.Pandoc.Definition -import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize, - linesToPara) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import qualified Text.Pandoc.Class as P - --- | Data to be written at the end of the document: --- (foot)notes, URLs, references, images. -data FbRenderState = FbRenderState - { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text - , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path - , parentListMarker :: String -- ^ list marker of the parent ordered list - , parentBulletLevel :: Int -- ^ nesting level of the unordered list - , writerOptions :: WriterOptions - } deriving (Show) - --- | FictionBook building monad. -type FBM m = StateT FbRenderState m - -newFB :: FbRenderState -newFB = FbRenderState { footnotes = [], imagesToFetch = [] - , parentListMarker = "", parentBulletLevel = 0 - , writerOptions = def } - -data ImageMode = NormalImage | InlineImage deriving (Eq) -instance Show ImageMode where - show NormalImage = "imageType" - show InlineImage = "inlineImageType" - --- | Produce an FB2 document from a 'Pandoc' document. -writeFB2 :: PandocMonad m - => WriterOptions -- ^ conversion options - -> Pandoc -- ^ document to convert - -> m String -- ^ FictionBook2 document (not encoded yet) -writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc - -pandocToFB2 :: PandocMonad m - => WriterOptions - -> Pandoc - -> FBM m String -pandocToFB2 opts (Pandoc meta blocks) = do - modify (\s -> s { writerOptions = opts }) - desc <- description meta - fp <- frontpage meta - secs <- renderSections 1 blocks - let body = el "body" $ fp ++ secs - notes <- renderFootnotes - (imgs,missing) <- liftM imagesToFetch get >>= \s -> lift (fetchImages s) - let body' = replaceImagesWithAlt missing body - let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) ++ "\n" - where - xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" - fb2_attrs = - let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0" - xlink = "http://www.w3.org/1999/xlink" - in [ uattr "xmlns" xmlns - , attr ("xmlns", "l") xlink ] - -frontpage :: PandocMonad m => Meta -> FBM m [Content] -frontpage meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ - [ el "title" (el "p" t) - , el "annotation" (map (el "p" . cMap plain) - (docAuthors meta' ++ [docDate meta'])) - ] - -description :: PandocMonad m => Meta -> FBM m Content -description meta' = do - bt <- booktitle meta' - let as = authors meta' - dd <- docdate meta' - return $ el "description" - [ el "title-info" (bt ++ as ++ dd) - , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version - ] - -booktitle :: PandocMonad m => Meta -> FBM m [Content] -booktitle meta' = do - t <- cMapM toXml . docTitle $ meta' - return $ if null t - then [] - else [ el "book-title" t ] - -authors :: Meta -> [Content] -authors meta' = cMap author (docAuthors meta') - -author :: [Inline] -> [Content] -author ss = - let ws = words . cMap plain $ ss - email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws - names = case ws' of - (nickname:[]) -> [ el "nickname" nickname ] - (fname:lname:[]) -> [ el "first-name" fname - , el "last-name" lname ] - (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) - , el "last-name" (last rest) ] - ([]) -> [] - in list $ el "author" (names ++ email) - -docdate :: PandocMonad m => Meta -> FBM m [Content] -docdate meta' = do - let ss = docDate meta' - d <- cMapM toXml ss - return $ if null d - then [] - else [el "date" d] - --- | Divide the stream of blocks into sections and convert to XML --- representation. -renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content] -renderSections level blocks = do - let secs = splitSections level blocks - mapM (renderSection level) secs - -renderSection :: PandocMonad m => Int -> ([Inline], [Block]) -> FBM m Content -renderSection level (ttl, body) = do - title <- if null ttl - then return [] - else return . list . el "title" . formatTitle $ ttl - content <- if (hasSubsections body) - then renderSections (level + 1) body - else cMapM blockToXml body - return $ el "section" (title ++ content) - where - hasSubsections = any isHeaderBlock - --- | Only <p> and <empty-line> are allowed within <title> in FB2. -formatTitle :: [Inline] -> [Content] -formatTitle inlines = - let lns = split isLineBreak inlines - lns' = map (el "p" . cMap plain) lns - in intersperse (el "empty-line" ()) lns' - -split :: (a -> Bool) -> [a] -> [[a]] -split _ [] = [] -split cond xs = let (b,a) = break cond xs - in (b:split cond (drop 1 a)) - -isLineBreak :: Inline -> Bool -isLineBreak LineBreak = True -isLineBreak _ = False - --- | Divide the stream of block elements into sections: [(title, blocks)]. -splitSections :: Int -> [Block] -> [([Inline], [Block])] -splitSections level blocks = reverse $ revSplit (reverse blocks) - where - revSplit [] = [] - revSplit rblocks = - let (lastsec, before) = break sameLevel rblocks - (header, prevblocks) = - case before of - ((Header n _ title):prevblocks') -> - if n == level - then (title, prevblocks') - else ([], before) - _ -> ([], before) - in (header, reverse lastsec) : revSplit prevblocks - sameLevel (Header n _ _) = n == level - sameLevel _ = False - --- | Make another FictionBook body with footnotes. -renderFootnotes :: PandocMonad m => FBM m [Content] -renderFootnotes = do - fns <- footnotes `liftM` get - if null fns - then return [] -- no footnotes - else return . list $ - el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) - where - renderFN (n, idstr, cs) = - let fn_texts = (el "title" (el "p" (show n))) : cs - in el "section" ([uattr "id" idstr], fn_texts) - --- | Fetch images and encode them for the FictionBook XML. --- Return image data and a list of hrefs of the missing images. -fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) -fetchImages links = do - imgs <- mapM (uncurry fetchImage) links - return $ (rights imgs, lefts imgs) - --- | Fetch image data from disk or from network and make a <binary> XML section. --- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: PandocMonad m => String -> String -> m (Either String Content) -fetchImage href link = do - mbimg <- - case (isURI link, readDataURI link) of - (True, Just (mime,_,True,base64)) -> - let mime' = map toLower mime - in if mime' == "image/png" || mime' == "image/jpeg" - then return (Just (mime',base64)) - else return Nothing - (True, Just _) -> return Nothing -- not base64-encoded - _ -> do - catchError (do (bs, mbmime) <- P.fetchItem Nothing link - case mbmime of - Nothing -> do - report $ CouldNotDetermineMimeType link - return Nothing - Just mime -> return $ Just (mime, - B8.unpack $ encode bs)) - (\e -> - do report $ CouldNotFetchResource link (show e) - return Nothing) - case mbimg of - Just (imgtype, imgdata) -> do - return . Right $ el "binary" - ( [uattr "id" href - , uattr "content-type" imgtype] - , txt imgdata ) - _ -> return (Left ('#':href)) - - --- | Extract mime type and encoded data from the Data URI. -readDataURI :: String -- ^ URI - -> Maybe (String,String,Bool,String) - -- ^ Maybe (mime,charset,isBase64,data) -readDataURI uri = - case stripPrefix "data:" uri of - Nothing -> Nothing - Just rest -> - let meta = takeWhile (/= ',') rest -- without trailing ',' - uridata = drop (length meta + 1) rest - parts = split (== ';') meta - (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts - in Just (mime,cs,enc,uridata) - - where - upd str m@(mime,cs,enc) - | isMimeType str = (str,cs,enc) - | Just str' <- stripPrefix "charset=" str = (mime,str',enc) - | str == "base64" = (mime,cs,True) - | otherwise = m - --- Without parameters like ;charset=...; see RFC 2045, 5.1 -isMimeType :: String -> Bool -isMimeType s = - case split (=='/') s of - [mtype,msubtype] -> - ((map toLower mtype) `elem` types - || "x-" `isPrefixOf` (map toLower mtype)) - && all valid mtype - && all valid msubtype - _ -> False - where - types = ["text","image","audio","video","application","message","multipart"] - valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` "()<>@,;:\\\"/[]?=" - -footnoteID :: Int -> String -footnoteID i = "n" ++ (show i) - -linkID :: Int -> String -linkID i = "l" ++ (show i) - --- | Convert a block-level Pandoc's element to FictionBook XML representation. -blockToXml :: PandocMonad m => Block -> FBM m [Content] -blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 -blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula --- title beginning with fig: indicates that the image is a figure -blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) = - insertImage NormalImage (Image atr alt (src,tit)) -blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss -blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code") . lines $ s -blockToXml b@(RawBlock _ _) = do - report $ BlockNotRendered b - return [] -blockToXml (Div _ bs) = cMapM blockToXml bs -blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs -blockToXml (LineBlock lns) = blockToXml $ linesToPara lns -blockToXml (OrderedList a bss) = do - state <- get - let pmrk = parentListMarker state - let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a - let mkitem mrk bs = do - modify (\s -> s { parentListMarker = mrk }) - itemtext <- cMapM blockToXml . paraToPlain $ bs - modify (\s -> s { parentListMarker = pmrk }) -- old parent marker - return . el "p" $ [ txt mrk, txt " " ] ++ itemtext - mapM (uncurry mkitem) (zip markers bss) -blockToXml (BulletList bss) = do - state <- get - let level = parentBulletLevel state - let pmrk = parentListMarker state - let prefix = replicate (length pmrk) ' ' - let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] - let mrk = prefix ++ bullets !! (level `mod` (length bullets)) - let mkitem bs = do - modify (\s -> s { parentBulletLevel = (level+1) }) - itemtext <- cMapM blockToXml . paraToPlain $ bs - modify (\s -> s { parentBulletLevel = level }) -- restore bullet level - return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext - mapM mkitem bss -blockToXml (DefinitionList defs) = - cMapM mkdef defs - where - mkdef (term, bss) = do - def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss - t <- wrap "strong" term - return [ el "p" t, el "p" def' ] - sep blocks = - if all needsBreak blocks then - blocks ++ [Plain [LineBreak]] - else - blocks - needsBreak (Para _) = False - needsBreak (Plain ins) = LineBreak `notElem` ins - needsBreak _ = True -blockToXml (Header _ _ _) = -- should never happen, see renderSections - throwError $ PandocShouldNeverHappenError "unexpected header in section text" -blockToXml HorizontalRule = return - [ el "empty-line" () - , el "p" (txt (replicate 10 '—')) - , el "empty-line" () ] -blockToXml (Table caption aligns _ headers rows) = do - hd <- mkrow "th" headers aligns - bd <- mapM (\r -> mkrow "td" r aligns) rows - c <- return . el "emphasis" =<< cMapM toXml caption - return [el "table" (hd : bd), el "p" c] - where - mkrow :: PandocMonad m => String -> [TableCell] -> [Alignment] -> FBM m Content - mkrow tag cells aligns' = - (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) - -- - mkcell :: PandocMonad m => String -> (TableCell, Alignment) -> FBM m Content - mkcell tag (cell, align) = do - cblocks <- cMapM blockToXml cell - return $ el tag ([align_attr align], cblocks) - -- - align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) - align_str AlignLeft = "left" - align_str AlignCenter = "center" - align_str AlignRight = "right" - align_str AlignDefault = "left" -blockToXml Null = return [] - --- Replace paragraphs with plain text and line break. --- Necessary to simulate multi-paragraph lists in FB2. -paraToPlain :: [Block] -> [Block] -paraToPlain [] = [] -paraToPlain (Para inlines : rest) = - let p = (Plain (inlines ++ [LineBreak])) - in p : paraToPlain rest -paraToPlain (p:rest) = p : paraToPlain rest - --- Simulate increased indentation level. Will not really work --- for multi-line paragraphs. -indent :: Block -> Block -indent = indentBlock - where - -- indentation space - spacer :: String - spacer = replicate 4 ' ' - -- - indentBlock (Plain ins) = Plain ((Str spacer):ins) - indentBlock (Para ins) = Para ((Str spacer):ins) - indentBlock (CodeBlock a s) = - let s' = unlines . map (spacer++) . lines $ s - in CodeBlock a s' - indentBlock (BlockQuote bs) = BlockQuote (map indent bs) - indentBlock (Header l attr' ins) = Header l attr' (indentLines ins) - indentBlock everythingElse = everythingElse - -- indent every (explicit) line - indentLines :: [Inline] -> [Inline] - indentLines ins = let lns = split isLineBreak ins :: [[Inline]] - in intercalate [LineBreak] $ map ((Str spacer):) lns - --- | Convert a Pandoc's Inline element to FictionBook XML representation. -toXml :: PandocMonad m => Inline -> FBM m [Content] -toXml (Str s) = return [txt s] -toXml (Span _ ils) = cMapM toXml ils -toXml (Emph ss) = list `liftM` wrap "emphasis" ss -toXml (Strong ss) = list `liftM` wrap "strong" ss -toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss -toXml (Superscript ss) = list `liftM` wrap "sup" ss -toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ capitalize ss -toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific - inner <- cMapM toXml ss - return $ [txt "‘"] ++ inner ++ [txt "’"] -toXml (Quoted DoubleQuote ss) = do - inner <- cMapM toXml ss - return $ [txt "“"] ++ inner ++ [txt "”"] -toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" s] -toXml Space = return [txt " "] -toXml SoftBreak = return [txt " "] -toXml LineBreak = return [el "empty-line" ()] -toXml (Math _ formula) = insertMath InlineImage formula -toXml il@(RawInline _ _) = do - report $ InlineNotRendered il - return [] -- raw TeX and raw HTML are suppressed -toXml (Link _ text (url,ttl)) = do - fns <- footnotes `liftM` get - let n = 1 + length fns - let ln_id = linkID n - let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]" - ln_text <- cMapM toXml text - let ln_desc = - let ttl' = dropWhile isSpace ttl - in if null ttl' - then list . el "p" $ el "code" url - else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ] - modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns }) - return $ ln_text ++ - [ el "a" - ( [ attr ("l","href") ('#':ln_id) - , uattr "type" "note" ] - , ln_ref) ] -toXml img@(Image _ _ _) = insertImage InlineImage img -toXml (Note bs) = do - fns <- footnotes `liftM` get - let n = 1 + length fns - let fn_id = footnoteID n - fn_desc <- cMapM blockToXml bs - modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) - let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]" - return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) - , uattr "type" "note" ] - , fn_ref ) - -insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] -insertMath immode formula = do - htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get - case htmlMath of - WebTeX url -> do - let alt = [Code nullAttr formula] - let imgurl = url ++ urlEncode formula - let img = Image nullAttr alt (imgurl, "") - insertImage immode img - _ -> return [el "code" formula] - -insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] -insertImage immode (Image _ alt (url,ttl)) = do - images <- imagesToFetch `liftM` get - let n = 1 + length images - let fname = "image" ++ show n - modify (\s -> s { imagesToFetch = (fname, url) : images }) - let ttlattr = case (immode, null ttl) of - (NormalImage, False) -> [ uattr "title" ttl ] - _ -> [] - return . list $ - el "image" $ - [ attr ("l","href") ('#':fname) - , attr ("l","type") (show immode) - , uattr "alt" (cMap plain alt) ] - ++ ttlattr -insertImage _ _ = error "unexpected inline instead of image" - -replaceImagesWithAlt :: [String] -> Content -> Content -replaceImagesWithAlt missingHrefs body = - let cur = XC.fromContent body - cur' = replaceAll cur - in XC.toTree . XC.root $ cur' - where - -- - replaceAll :: XC.Cursor -> XC.Cursor - replaceAll c = - let n = XC.current c - c' = if isImage n && isMissing n - then XC.modifyContent replaceNode c - else c - in case XC.nextDF c' of - (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document - -- - isImage :: Content -> Bool - isImage (Elem e) = (elName e) == (uname "image") - isImage _ = False - -- - isMissing (Elem img@(Element _ _ _ _)) = - let imgAttrs = elAttribs img - badAttrs = map (attr ("l","href")) missingHrefs - in any (`elem` imgAttrs) badAttrs - isMissing _ = False - -- - replaceNode :: Content -> Content - replaceNode n@(Elem img@(Element _ _ _ _)) = - let attrs = elAttribs img - alt = getAttrVal attrs (uname "alt") - imtype = getAttrVal attrs (qname "l" "type") - in case (alt, imtype) of - (Just alt', Just imtype') -> - if imtype' == show NormalImage - then el "p" alt' - else txt alt' - (Just alt', Nothing) -> txt alt' -- no type attribute - _ -> n -- don't replace if alt text is not found - replaceNode n = n - -- - getAttrVal :: [X.Attr] -> QName -> Maybe String - getAttrVal attrs name = - case filter ((name ==) . attrKey) attrs of - (a:_) -> Just (attrVal a) - _ -> Nothing - - --- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: PandocMonad m => String -> [Inline] -> FBM m Content -wrap tagname inlines = el tagname `liftM` cMapM toXml inlines - --- " Create a singleton list. -list :: a -> [a] -list = (:[]) - --- | Convert an 'Inline' to plaintext. -plain :: Inline -> String -plain (Str s) = s -plain (Emph ss) = concat (map plain ss) -plain (Span _ ss) = concat (map plain ss) -plain (Strong ss) = concat (map plain ss) -plain (Strikeout ss) = concat (map plain ss) -plain (Superscript ss) = concat (map plain ss) -plain (Subscript ss) = concat (map plain ss) -plain (SmallCaps ss) = concat (map plain ss) -plain (Quoted _ ss) = concat (map plain ss) -plain (Cite _ ss) = concat (map plain ss) -- FIXME -plain (Code _ s) = s -plain Space = " " -plain SoftBreak = " " -plain LineBreak = "\n" -plain (Math _ s) = s -plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"]) -plain (Image _ alt _) = concat (map plain alt) -plain (Note _) = "" -- FIXME - --- | Create an XML element. -el :: (Node t) - => String -- ^ unqualified element name - -> t -- ^ node contents - -> Content -- ^ XML content -el name cs = Elem $ unode name cs - --- | Put empty lines around content -spaceBeforeAfter :: [Content] -> [Content] -spaceBeforeAfter cs = - let emptyline = el "empty-line" () - in [emptyline] ++ cs ++ [emptyline] - --- | Create a plain-text XML content. -txt :: String -> Content -txt s = Text $ CData CDataText s Nothing - --- | Create an XML attribute with an unqualified name. -uattr :: String -> String -> Text.XML.Light.Attr -uattr name val = Attr (uname name) val - --- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> String -> Text.XML.Light.Attr -attr (ns, name) val = Attr (qname ns name) val - --- | Unqualified name -uname :: String -> QName -uname name = QName name Nothing Nothing - --- | Qualified name -qname :: String -> String -> QName -qname ns name = QName name Nothing (Just ns) - --- | Abbreviation for 'concatMap'. -cMap :: (a -> [b]) -> [a] -> [b] -cMap = concatMap - --- | Monadic equivalent of 'concatMap'. -cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] -cMapM f xs = concat `liftM` mapM f xs diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs deleted file mode 100644 index 99f8c5b42..000000000 --- a/src/Text/Pandoc/Writers/HTML.hs +++ /dev/null @@ -1,1069 +0,0 @@ -{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to HTML. --} -module Text.Pandoc.Writers.HTML ( - writeHtml4, - writeHtml4String, - writeHtml5, - writeHtml5String, - writeHtmlStringForEPUB, - writeS5, - writeSlidy, - writeSlideous, - writeDZSlides, - writeRevealJs - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Data.Monoid ((<>)) -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates -import Text.Pandoc.Writers.Math -import Text.Pandoc.Slides -import Text.Pandoc.Highlighting ( highlight, styleToCss, - formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities, escapeStringForXML) -import Network.URI ( parseURIReference, URI(..), unEscapeString ) -import Network.HTTP ( urlEncode ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe, isJust ) -import Control.Monad.State -import Text.Blaze.Html hiding(contents) -#if MIN_VERSION_blaze_markup(0,6,3) -#else -import Text.Blaze.Internal(preEscapedString) -#endif -#if MIN_VERSION_blaze_html(0,5,1) -import qualified Text.Blaze.XHtml5 as H5 -#else -import qualified Text.Blaze.Html5 as H5 -#endif -import qualified Text.Blaze.XHtml1.Transitional as H -import qualified Text.Blaze.XHtml1.Transitional.Attributes as A -import Text.Blaze.Html.Renderer.String (renderHtml) -import Text.TeXMath -import Text.XML.Light.Output -import Text.XML.Light (unode, elChildren, unqual) -import qualified Text.XML.Light as XML -import System.FilePath (takeExtension) -import Data.Aeson (Value) -import Control.Monad.Except (throwError) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - -data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stMath :: Bool -- ^ Math is used in document - , stQuotes :: Bool -- ^ <q> tag is used - , stHighlighting :: Bool -- ^ Syntax highlighting is used - , stSecNum :: [Int] -- ^ Number of current section - , stElement :: Bool -- ^ Processing an Element - , stHtml5 :: Bool -- ^ Use HTML5 - , stEPUBVersion :: Maybe EPUBVersion -- ^ EPUB version if for epub - , stSlideVariant :: HTMLSlideVariant - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, - stHighlighting = False, stSecNum = [], - stElement = False, stHtml5 = False, - stEPUBVersion = Nothing, - stSlideVariant = NoSlides} - --- Helpers to render HTML with the appropriate function. - -strToHtml :: String -> Html -strToHtml ('\'':xs) = preEscapedString "\'" `mappend` strToHtml xs -strToHtml xs@(_:_) = case break (=='\'') xs of - (_ ,[]) -> toHtml xs - (ys,zs) -> toHtml ys `mappend` strToHtml zs -strToHtml [] = "" - --- | Hard linebreak. -nl :: WriterOptions -> Html -nl opts = if writerWrapText opts == WrapNone - then mempty - else preEscapedString "\n" - --- | Convert Pandoc document to Html 5 string. -writeHtml5String :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeHtml5String = writeHtmlString' - defaultWriterState{ stHtml5 = True } - --- | Convert Pandoc document to Html 5 structure. -writeHtml5 :: PandocMonad m => WriterOptions -> Pandoc -> m Html -writeHtml5 = writeHtml' defaultWriterState{ stHtml5 = True } - --- | Convert Pandoc document to Html 4 string. -writeHtml4String :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeHtml4String = writeHtmlString' - defaultWriterState{ stHtml5 = False } - --- | Convert Pandoc document to Html 4 structure. -writeHtml4 :: PandocMonad m => WriterOptions -> Pandoc -> m Html -writeHtml4 = writeHtml' defaultWriterState{ stHtml5 = False } - --- | Convert Pandoc document to Html appropriate for an epub version. -writeHtmlStringForEPUB :: PandocMonad m - => EPUBVersion -> WriterOptions -> Pandoc -> m String -writeHtmlStringForEPUB version = writeHtmlString' - defaultWriterState{ stHtml5 = version == EPUB3, - stEPUBVersion = Just version } - --- | Convert Pandoc document to Reveal JS HTML slide show. -writeRevealJs :: PandocMonad m - => WriterOptions -> Pandoc -> m String -writeRevealJs = writeHtmlSlideShow' RevealJsSlides - --- | Convert Pandoc document to S5 HTML slide show. -writeS5 :: PandocMonad m - => WriterOptions -> Pandoc -> m String -writeS5 = writeHtmlSlideShow' S5Slides - --- | Convert Pandoc document to Slidy HTML slide show. -writeSlidy :: PandocMonad m - => WriterOptions -> Pandoc -> m String -writeSlidy = writeHtmlSlideShow' SlidySlides - --- | Convert Pandoc document to Slideous HTML slide show. -writeSlideous :: PandocMonad m - => WriterOptions -> Pandoc -> m String -writeSlideous = writeHtmlSlideShow' SlideousSlides - --- | Convert Pandoc document to DZSlides HTML slide show. -writeDZSlides :: PandocMonad m - => WriterOptions -> Pandoc -> m String -writeDZSlides = writeHtmlSlideShow' DZSlides - -writeHtmlSlideShow' :: PandocMonad m - => HTMLSlideVariant -> WriterOptions -> Pandoc -> m String -writeHtmlSlideShow' variant = writeHtmlString' - defaultWriterState{ stSlideVariant = variant - , stHtml5 = case variant of - RevealJsSlides -> True - S5Slides -> False - SlidySlides -> False - DZSlides -> True - SlideousSlides -> False - NoSlides -> False - } - -writeHtmlString' :: PandocMonad m - => WriterState -> WriterOptions -> Pandoc -> m String -writeHtmlString' st opts d = do - (body, context) <- evalStateT (pandocToHtml opts d) st - return $ case writerTemplate opts of - Nothing -> renderHtml body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context - -writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html -writeHtml' st opts d = do - (body, context) <- evalStateT (pandocToHtml opts d) st - return $ case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl $ - defField "body" (renderHtml body) context - --- result is (title, authors, date, toc, body, new variables) -pandocToHtml :: PandocMonad m - => WriterOptions - -> Pandoc - -> StateT WriterState m (Html, Value) -pandocToHtml opts (Pandoc meta blocks) = do - metadata <- metaToJSON opts - (fmap renderHtml . blockListToHtml opts) - (fmap renderHtml . inlineListToHtml opts) - meta - let stringifyHTML = escapeStringForXML . stringify - let authsMeta = map stringifyHTML $ docAuthors meta - let dateMeta = stringifyHTML $ docDate meta - let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts - slideVariant <- gets stSlideVariant - let sects = hierarchicalize $ - if slideVariant == NoSlides - then blocks - else prepSlides slideLevel blocks - toc <- if writerTableOfContents opts && slideVariant /= S5Slides - then tableOfContents opts sects - else return Nothing - blocks' <- liftM (mconcat . intersperse (nl opts)) $ - mapM (elementToHtml slideLevel opts) sects - st <- get - notes <- footnoteSection opts (reverse (stNotes st)) - let thebody = blocks' >> notes - let math = case writerHTMLMathMethod opts of - LaTeXMathML (Just url) -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ mempty - MathJax url -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ case slideVariant of - SlideousSlides -> - preEscapedString - "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" - _ -> mempty - JsMath (Just url) -> - H.script ! A.src (toValue url) - ! A.type_ "text/javascript" - $ mempty - KaTeX js css -> - (H.script ! A.src (toValue js) $ mempty) <> - (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> - (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) - _ -> case lookup "mathml-script" (writerVariables opts) of - Just s | not (stHtml5 st) -> - H.script ! A.type_ "text/javascript" - $ preEscapedString - ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") - | otherwise -> mempty - Nothing -> mempty - let context = (if stHighlighting st - then case writerHighlightStyle opts of - Just sty -> defField "highlighting-css" - (styleToCss sty) - Nothing -> id - else id) $ - (if stMath st - then defField "math" (renderHtml math) - else id) $ - defField "quotes" (stQuotes st) $ - maybe id (defField "toc" . renderHtml) toc $ - defField "author-meta" authsMeta $ - maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringifyHTML $ docTitle meta) $ - defField "idprefix" (writerIdentifierPrefix opts) $ - -- these should maybe be set in pandoc.hs - defField "slidy-url" - ("http://www.w3.org/Talks/Tools/Slidy2" :: String) $ - defField "slideous-url" ("slideous" :: String) $ - defField "revealjs-url" ("reveal.js" :: String) $ - defField "s5-url" ("s5/default" :: String) $ - defField "html5" (stHtml5 st) $ - metadata - return (thebody, context) - --- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix -prefixedId :: WriterOptions -> String -> Attribute -prefixedId opts s = - case s of - "" -> mempty - _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s - -toList :: PandocMonad m - => (Html -> Html) - -> WriterOptions - -> [Html] - -> StateT WriterState m Html -toList listop opts items = do - slideVariant <- gets stSlideVariant - return $ - if (writerIncremental opts) - then if (slideVariant /= RevealJsSlides) - then (listop $ mconcat items) ! A.class_ "incremental" - else listop $ mconcat $ map (! A.class_ "fragment") items - else listop $ mconcat items - -unordList :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -unordList opts = toList H.ul opts . toListItems opts - -ordList :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -ordList opts = toList H.ol opts . toListItems opts - -defList :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -defList opts items = toList H.dl opts (items ++ [nl opts]) - --- | Construct table of contents from list of elements. -tableOfContents :: PandocMonad m => WriterOptions -> [Element] -> StateT WriterState m (Maybe Html) -tableOfContents _ [] = return Nothing -tableOfContents opts sects = do - contents <- mapM (elementToListItem opts) sects - let tocList = catMaybes contents - if null tocList - then return Nothing - else Just <$> unordList opts tocList - --- | Convert section number to string -showSecNum :: [Int] -> String -showSecNum = concat . intersperse "." . map show - --- | Converts an Element to a list item for a table of contents, --- retrieving the appropriate identifier from state. -elementToListItem :: PandocMonad m => WriterOptions -> Element -> StateT WriterState m (Maybe Html) --- Don't include the empty headers created in slide shows --- shows when an hrule is used to separate slides without a new title: -elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing -elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) - | lev <= writerTOCDepth opts = do - let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) - let sectnum = if writerNumberSections opts && not (null num) && - "unnumbered" `notElem` classes - then (H.span ! A.class_ "toc-section-number" - $ toHtml $ showSecNum num') >> preEscapedString " " - else mempty - txt <- liftM (sectnum >>) $ inlineListToHtml opts $ walk deNote headerText - subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes - subList <- if null subHeads - then return mempty - else unordList opts subHeads - -- in reveal.js, we need #/apples, not #apples: - slideVariant <- gets stSlideVariant - let revealSlash = ['/' | slideVariant== RevealJsSlides] - return $ Just - $ if null id' - then (H.a $ toHtml txt) >> subList - else (H.a ! A.href (toValue $ "#" ++ revealSlash ++ - writerIdentifierPrefix opts ++ id') - $ toHtml txt) >> subList -elementToListItem _ _ = return Nothing - --- | Convert an Element to Html. -elementToHtml :: PandocMonad m => Int -> WriterOptions -> Element -> StateT WriterState m Html -elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block -elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elements) = do - slideVariant <- gets stSlideVariant - let slide = slideVariant /= NoSlides && level <= slideLevel - let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) - modify $ \st -> st{stSecNum = num'} -- update section number - html5 <- gets stHtml5 - let titleSlide = slide && level < slideLevel - header' <- if title' == [Str "\0"] -- marker for hrule - then return mempty - else do - modify (\st -> st{ stElement = True}) - res <- blockToHtml opts - (Header level (id',classes,keyvals) title') - modify (\st -> st{ stElement = False}) - return res - - let isSec (Sec _ _ _ _ _) = True - isSec (Blk _) = False - let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] - isPause _ = False - let fragmentClass = case slideVariant of - RevealJsSlides -> "fragment" - _ -> "incremental" - let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" - ++ fragmentClass ++ "\">")) : - (xs ++ [Blk (RawBlock (Format "html") "</div>")]) - innerContents <- mapM (elementToHtml slideLevel opts) - $ if titleSlide - -- title slides have no content of their own - then filter isSec elements - else case splitBy isPause elements of - [] -> [] - (x:xs) -> x ++ concatMap inDiv xs - let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] - let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ - ["section" | (slide || writerSectionDivs opts) && - not html5 ] ++ - ["level" ++ show level | slide || writerSectionDivs opts ] - ++ classes - let secttag = if html5 - then H5.section - else H.div - let attr = (id',classes',keyvals) - return $ if titleSlide - then (if slideVariant == RevealJsSlides - then H5.section - else id) $ mconcat $ - (addAttrs opts attr $ secttag $ header') : innerContents - else if writerSectionDivs opts || slide - then addAttrs opts attr - $ secttag $ inNl $ header' : innerContents - else mconcat $ intersperse (nl opts) - $ addAttrs opts attr header' : innerContents - --- | Convert list of Note blocks to a footnote <div>. --- Assumes notes are sorted. -footnoteSection :: PandocMonad m - => WriterOptions -> [Html] -> StateT WriterState m Html -footnoteSection opts notes = do - html5 <- gets stHtml5 - slideVariant <- gets stSlideVariant - let hrtag = if html5 then H5.hr else H.hr - let container x = if html5 - then H5.section ! A.class_ "footnotes" $ x - else if slideVariant /= NoSlides - then H.div ! A.class_ "footnotes slide" $ x - else H.div ! A.class_ "footnotes" $ x - return $ - if null notes - then mempty - else nl opts >> (container - $ nl opts >> hrtag >> nl opts >> - H.ol (mconcat notes >> nl opts) >> nl opts) - --- | Parse a mailto link; return Just (name, domain) or Nothing. -parseMailto :: String -> Maybe (String, String) -parseMailto s = do - case break (==':') s of - (xs,':':addr) | map toLower xs == "mailto" -> do - let (name', rest) = span (/='@') addr - let domain = drop 1 rest - return (name', domain) - _ -> fail "not a mailto: URL" - --- | Obfuscate a "mailto:" link. -obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html -obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = - return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (renderHtml -> txt) s = - let meth = writerEmailObfuscation opts - s' = map toLower (take 7 s) ++ drop 7 s - in case parseMailto s' of - (Just (name', domain)) -> - let domain' = substitute "." " dot " domain - at' = obfuscateChar '@' - (linkText, altText) = - if txt == drop 7 s' -- autolink - then ("e", name' ++ " at " ++ domain') - else ("'" ++ obfuscateString txt ++ "'", - txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") - in case meth of - ReferenceObfuscation -> - -- need to use preEscapedString or &'s are escaped to & in URL - return $ - preEscapedString $ "<a href=\"" ++ (obfuscateString s') - ++ "\" class=\"email\">" ++ (obfuscateString txt) ++ "</a>" - JavascriptObfuscation -> - return $ - (H.script ! A.type_ "text/javascript" $ - preEscapedString ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ - obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> - H.noscript (preEscapedString $ obfuscateString altText) - _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth - _ -> return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email - --- | Obfuscate character as entity. -obfuscateChar :: Char -> String -obfuscateChar char = - let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" - --- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . fromEntities - -addAttrs :: WriterOptions -> Attr -> Html -> Html -addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr) - -toAttrs :: [(String, String)] -> [Attribute] -toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs - -attrsToHtml :: WriterOptions -> Attr -> [Attribute] -attrsToHtml opts (id',classes',keyvals) = - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals - -imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute] -imgAttrsToHtml opts attr = - attrsToHtml opts (ident,cls,kvs') ++ - toAttrs (dimensionsToAttrList opts attr) - where - (ident,cls,kvs) = attr - kvs' = filter isNotDim kvs - isNotDim ("width", _) = False - isNotDim ("height", _) = False - isNotDim _ = True - -dimensionsToAttrList :: WriterOptions -> Attr -> [(String, String)] -dimensionsToAttrList opts attr = (go Width) ++ (go Height) - where - go dir = case (dimension dir attr) of - (Just (Percent a)) -> [("style", show dir ++ ":" ++ show (Percent a))] - (Just dim) -> [(show dir, showInPixel opts dim)] - _ -> [] - - -imageExts :: [String] -imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", - "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm", - "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff", - "wbmp", "xbm", "xpm", "xwd" ] - -treatAsImage :: FilePath -> Bool -treatAsImage fp = - let path = case uriPath `fmap` parseURIReference fp of - Nothing -> fp - Just up -> up - ext = map toLower $ drop 1 $ takeExtension path - in null ext || ext `elem` imageExts - --- | Convert Pandoc block element to HTML. -blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html -blockToHtml _ Null = return mempty -blockToHtml opts (Plain lst) = inlineListToHtml opts lst --- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = do - img <- inlineToHtml opts (Image attr txt (s,tit)) - html5 <- gets stHtml5 - let tocapt = if html5 - then H5.figcaption - else H.p ! A.class_ "caption" - capt <- if null txt - then return mempty - else tocapt `fmap` inlineListToHtml opts txt - return $ if html5 - then H5.figure $ mconcat - [nl opts, img, capt, nl opts] - else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, nl opts, capt, nl opts] -blockToHtml opts (Para lst) - | isEmptyRaw lst = return mempty - | otherwise = do - contents <- inlineListToHtml opts lst - return $ H.p contents - where - isEmptyRaw [RawInline f _] = f /= (Format "html") - isEmptyRaw _ = False -blockToHtml opts (LineBlock lns) = - if writerWrapText opts == WrapNone - then blockToHtml opts $ linesToPara lns - else do - let lf = preEscapedString "\n" - htmlLines <- mconcat . intersperse lf <$> mapM (inlineListToHtml opts) lns - return $ H.div ! A.style "white-space: pre-line;" $ htmlLines -blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do - html5 <- gets stHtml5 - let speakerNotes = "notes" `elem` classes - -- we don't want incremental output inside speaker notes, see #1394 - let opts' = if speakerNotes then opts{ writerIncremental = False } else opts - contents <- blockListToHtml opts' bs - let contents' = nl opts >> contents >> nl opts - let (divtag, classes') = if html5 && "section" `elem` classes - then (H5.section, filter (/= "section") classes) - else (H.div, classes) - slideVariant <- gets stSlideVariant - return $ - if speakerNotes - then case slideVariant of - RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' - DZSlides -> (addAttrs opts' attr $ H5.div $ contents') - ! (H5.customAttribute "role" "note") - NoSlides -> addAttrs opts' attr $ H.div $ contents' - _ -> mempty - else addAttrs opts (ident, classes', kvs) $ divtag $ contents' -blockToHtml opts (RawBlock f str) - | f == Format "html" = return $ preEscapedString str - | (f == Format "latex" || f == Format "tex") && - allowsMathEnvironments (writerHTMLMathMethod opts) && - isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str] - | otherwise = do - report $ BlockNotRendered (RawBlock f str) - return mempty -blockToHtml _ (HorizontalRule) = do - html5 <- gets stHtml5 - return $ if html5 then H5.hr else H.hr -blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - let tolhs = isEnabled Ext_literate_haskell opts && - any (\c -> map toLower c == "haskell") classes && - any (\c -> map toLower c == "literate") classes - classes' = if tolhs - then map (\c -> if map toLower c == "haskell" - then "literatehaskell" - else c) classes - else classes - adjCode = if tolhs - then unlines . map ("> " ++) . lines $ rawCode - else rawCode - hlCode = if isJust (writerHighlightStyle opts) - then highlight formatHtmlBlock - (id',classes',keyvals) adjCode - else Nothing - case hlCode of - Nothing -> return $ addAttrs opts (id',classes,keyvals) - $ H.pre $ H.code $ toHtml adjCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (addAttrs opts (id',[],keyvals) h) -blockToHtml opts (BlockQuote blocks) = do - -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - slideVariant <- gets stSlideVariant - if slideVariant /= NoSlides - then let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) - (BulletList lst) - [OrderedList attribs lst] -> - blockToHtml (opts {writerIncremental = inc}) - (OrderedList attribs lst) - [DefinitionList lst] -> - blockToHtml (opts {writerIncremental = inc}) - (DefinitionList lst) - _ -> do contents <- blockListToHtml opts blocks - return $ H.blockquote - $ nl opts >> contents >> nl opts - else do - contents <- blockListToHtml opts blocks - return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level attr@(_,classes,_) lst) = do - contents <- inlineListToHtml opts lst - secnum <- liftM stSecNum get - let contents' = if writerNumberSections opts && not (null secnum) - && "unnumbered" `notElem` classes - then (H.span ! A.class_ "header-section-number" $ toHtml - $ showSecNum secnum) >> strToHtml " " >> contents - else contents - inElement <- gets stElement - return $ (if inElement then id else addAttrs opts attr) - $ case level of - 1 -> H.h1 contents' - 2 -> H.h2 contents' - 3 -> H.h3 contents' - 4 -> H.h4 contents' - 5 -> H.h5 contents' - 6 -> H.h6 contents' - _ -> H.p contents' -blockToHtml opts (BulletList lst) = do - contents <- mapM (blockListToHtml opts) lst - unordList opts contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do - contents <- mapM (blockListToHtml opts) lst - html5 <- gets stHtml5 - let numstyle' = case numstyle of - Example -> "decimal" - _ -> camelCaseToHyphenated $ show numstyle - let attribs = (if startnum /= 1 - then [A.start $ toValue startnum] - else []) ++ - (if numstyle == Example - then [A.class_ "example"] - else []) ++ - (if numstyle /= DefaultStyle - then if html5 - then [A.type_ $ - case numstyle of - Decimal -> "1" - LowerAlpha -> "a" - UpperAlpha -> "A" - LowerRoman -> "i" - UpperRoman -> "I" - _ -> "1"] - else [A.style $ toValue $ "list-style-type: " ++ - numstyle'] - else []) - l <- ordList opts contents - return $ foldl (!) l attribs -blockToHtml opts (DefinitionList lst) = do - contents <- mapM (\(term, defs) -> - do term' <- if null term - then return mempty - else liftM H.dt $ inlineListToHtml opts term - defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . - blockListToHtml opts) defs - return $ mconcat $ nl opts : term' : nl opts : - intersperse (nl opts) defs') lst - defList opts contents -blockToHtml opts (Table capt aligns widths headers rows') = do - captionDoc <- if null capt - then return mempty - else do - cs <- inlineListToHtml opts capt - return $ H.caption cs >> nl opts - html5 <- gets stHtml5 - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let coltags = if all (== 0.0) widths - then mempty - else do - H.colgroup $ do - nl opts - mapM_ (\w -> do - if html5 - then H.col ! A.style (toValue $ "width: " ++ - percent w) - else H.col ! A.width (toValue $ percent w) - nl opts) widths - nl opts - head' <- if all null headers - then return mempty - else do - contents <- tableRowToHtml opts aligns 0 headers - return $ H.thead (nl opts >> contents) >> nl opts - body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $ - zipWithM (tableRowToHtml opts aligns) [1..] rows' - let tbl = H.table $ - nl opts >> captionDoc >> coltags >> head' >> body' >> nl opts - let totalWidth = sum widths - -- When widths of columns are < 100%, we need to set width for the whole - -- table, or some browsers give us skinny columns with lots of space between: - return $ if totalWidth == 0 || totalWidth == 1 - then tbl - else tbl ! A.style (toValue $ "width:" ++ - show (round (totalWidth * 100) :: Int) ++ "%;") - -tableRowToHtml :: PandocMonad m - => WriterOptions - -> [Alignment] - -> Int - -> [[Block]] - -> StateT WriterState m Html -tableRowToHtml opts aligns rownum cols' = do - let mkcell = if rownum == 0 then H.th else H.td - let rowclass = case rownum of - 0 -> "header" - x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToHtml opts mkcell alignment item) - aligns cols' - return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'') - >> nl opts - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "" - -tableItemToHtml :: PandocMonad m - => WriterOptions - -> (Html -> Html) - -> Alignment - -> [Block] - -> StateT WriterState m Html -tableItemToHtml opts tag' align' item = do - contents <- blockListToHtml opts item - html5 <- gets stHtml5 - let alignStr = alignmentToString align' - let attribs = if html5 - then A.style (toValue $ "text-align: " ++ alignStr ++ ";") - else A.align (toValue alignStr) - let tag'' = if null alignStr - then tag' - else tag' ! attribs - return $ (tag'' $ contents) >> nl opts - -toListItems :: WriterOptions -> [Html] -> [Html] -toListItems opts items = map (toListItem opts) items ++ [nl opts] - -toListItem :: WriterOptions -> Html -> Html -toListItem opts item = nl opts >> H.li item - -blockListToHtml :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Html -blockListToHtml opts lst = - fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst - --- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Html -inlineListToHtml opts lst = - mapM (inlineToHtml opts) lst >>= return . mconcat - --- | Annotates a MathML expression with the tex source -annotateMML :: XML.Element -> String -> XML.Element -annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) - where - cs = case elChildren e of - [] -> unode "mrow" () - [x] -> x - xs -> unode "mrow" xs - math childs = XML.Element q as [XML.Elem childs] l - where - (XML.Element q as _ l) = e - annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"] - - --- | Convert Pandoc inline element to HTML. -inlineToHtml :: PandocMonad m - => WriterOptions -> Inline -> StateT WriterState m Html -inlineToHtml opts inline = do - html5 <- gets stHtml5 - case inline of - (Str str) -> return $ strToHtml str - (Space) -> return $ strToHtml " " - (SoftBreak) -> return $ case writerWrapText opts of - WrapNone -> preEscapedString " " - WrapAuto -> preEscapedString " " - WrapPreserve -> preEscapedString "\n" - (LineBreak) -> return $ (if html5 then H5.br else H.br) - <> strToHtml "\n" - (Span (id',classes,kvs) ils) - -> inlineListToHtml opts ils >>= - return . addAttrs opts attr' . H.span - where attr' = (id',classes',kvs') - classes' = filter (`notElem` ["csl-no-emph", - "csl-no-strong", - "csl-no-smallcaps"]) classes - kvs' = if null styles - then kvs - else (("style", concat styles) : kvs) - styles = ["font-style:normal;" - | "csl-no-emph" `elem` classes] - ++ ["font-weight:normal;" - | "csl-no-strong" `elem` classes] - ++ ["font-variant:normal;" - | "csl-no-smallcaps" `elem` classes] - (Emph lst) -> inlineListToHtml opts lst >>= return . H.em - (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong - (Code attr str) -> case hlCode of - Nothing -> return - $ addAttrs opts attr - $ H.code $ strToHtml str - Just h -> do - modify $ \st -> st{ stHighlighting = True } - return $ addAttrs opts (id',[],keyvals) h - where (id',_,keyvals) = attr - hlCode = if isJust (writerHighlightStyle opts) - then highlight formatHtmlInline - attr str - else Nothing - (Strikeout lst) -> inlineListToHtml opts lst >>= - return . H.del - (SmallCaps lst) -> inlineListToHtml opts lst >>= - return . (H.span ! A.style "font-variant: small-caps;") - (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup - (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub - (Quoted quoteType lst) -> - let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (strToHtml "‘", - strToHtml "’") - DoubleQuote -> (strToHtml "“", - strToHtml "”") - in if writerHtmlQTags opts - then do - modify $ \st -> st{ stQuotes = True } - H.q `fmap` inlineListToHtml opts lst - else (\x -> leftQuote >> x >> rightQuote) - `fmap` inlineListToHtml opts lst - (Math t str) -> do - modify (\st -> st {stMath = True}) - let mathClass = toValue $ ("math " :: String) ++ - if t == InlineMath then "inline" else "display" - case writerHTMLMathMethod opts of - LaTeXMathML _ -> - -- putting LaTeXMathML in container with class "LaTeX" prevents - -- non-math elements on the page from being treated as math by - -- the javascript - return $ H.span ! A.class_ "LaTeX" $ - case t of - InlineMath -> toHtml ("$" ++ str ++ "$") - DisplayMath -> toHtml ("$$" ++ str ++ "$$") - JsMath _ -> do - let m = preEscapedString str - return $ case t of - InlineMath -> H.span ! A.class_ mathClass $ m - DisplayMath -> H.div ! A.class_ mathClass $ m - WebTeX url -> do - let imtag = if html5 then H5.img else H.img - let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode str) - ! A.alt (toValue str) - ! A.title (toValue str) - let brtag = if html5 then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag - GladTeX -> - return $ case t of - InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" - DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML -> do - let conf = useShortEmptyTags (const False) - defaultConfigPP - res <- lift $ convertMath writeMathML t str - case res of - Right r -> return $ preEscapedString $ - ppcElement conf (annotateMML r str) - Left il -> (H.span ! A.class_ mathClass) <$> - inlineToHtml opts il - MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ - case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" - KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ - toHtml (case t of - InlineMath -> str - DisplayMath -> "\\displaystyle " ++ str) - PlainMath -> do - x <- lift (texMathToInlines t str) >>= inlineListToHtml opts - let m = H.span ! A.class_ mathClass $ x - let brtag = if html5 then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag - (RawInline f str) - | f == Format "html" -> return $ preEscapedString str - | otherwise -> do - report $ InlineNotRendered inline - return mempty - (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do - linkText <- inlineListToHtml opts txt - lift $ obfuscateLink opts attr linkText s - (Link attr txt (s,tit)) -> do - linkText <- inlineListToHtml opts txt - slideVariant <- gets stSlideVariant - let s' = case s of - '#':xs -> let prefix = if slideVariant == RevealJsSlides - then "/" - else writerIdentifierPrefix opts - in '#' : prefix ++ xs - _ -> s - let link = H.a ! A.href (toValue s') $ linkText - let link' = if txt == [Str (unEscapeString s)] - then link ! A.class_ "uri" - else link - let link'' = addAttrs opts attr link' - return $ if null tit - then link'' - else link'' ! A.title (toValue tit) - (Image attr txt (s,tit)) | treatAsImage s -> do - let alternate' = stringify txt - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - [A.alt $ toValue alternate' | not (null txt)] ++ - imgAttrsToHtml opts attr - let tag = if html5 then H5.img else H.img - return $ foldl (!) tag attributes - -- note: null title included, as in Markdown.pl - (Image attr _ (s,tit)) -> do - let attributes = [A.src $ toValue s] ++ - [A.title $ toValue tit | not (null tit)] ++ - imgAttrsToHtml opts attr - return $ foldl (!) H5.embed attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do - notes <- gets stNotes - let number = (length notes) + 1 - let ref = show number - htmlContents <- blockListToNote opts ref contents - epubVersion <- gets stEPUBVersion - -- push contents onto front of notes - modify $ \st -> st {stNotes = (htmlContents:notes)} - slideVariant <- gets stSlideVariant - let revealSlash = ['/' | slideVariant == RevealJsSlides] - let link = H.a ! A.href (toValue $ "#" ++ - revealSlash ++ - writerIdentifierPrefix opts ++ "fn" ++ ref) - ! A.class_ "footnoteRef" - ! prefixedId opts ("fnref" ++ ref) - $ (if isJust epubVersion - then id - else H.sup) - $ toHtml ref - return $ case epubVersion of - Just EPUB3 -> link ! customAttribute "epub:type" "noteref" - _ -> link - (Cite cits il)-> do contents <- inlineListToHtml opts il - let citationIds = unwords $ map citationId cits - let result = H.span ! A.class_ "citation" $ contents - return $ if html5 - then result ! customAttribute "data-cites" (toValue citationIds) - else result - -blockListToNote :: PandocMonad m => WriterOptions -> String -> [Block] -> StateT WriterState m Html -blockListToNote opts ref blocks = - -- If last block is Para or Plain, include the backlink at the end of - -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [Link nullAttr [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])] - blocks' = if null blocks - then [] - else let lastBlock = last blocks - otherBlocks = init blocks - in case lastBlock of - (Para lst) -> otherBlocks ++ - [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ - [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, - Plain backlink] - in do contents <- blockListToHtml opts blocks' - let noteItem = H.li ! (prefixedId opts ("fn" ++ ref)) $ contents - epubVersion <- gets stEPUBVersion - let noteItem' = case epubVersion of - Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" - _ -> noteItem - return $ nl opts >> noteItem' - --- Javascript snippet to render all KaTeX elements -renderKaTeX :: String -renderKaTeX = unlines [ - "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" - , "for (var i=0; i < mathElements.length; i++)" - , "{" - , " var texText = mathElements[i].firstChild" - , " katex.render(texText.data, mathElements[i])" - , "}}" - ] - -isMathEnvironment :: String -> Bool -isMathEnvironment s = "\\begin{" `isPrefixOf` s && - envName `elem` mathmlenvs - where envName = takeWhile (/= '}') (drop 7 s) - mathmlenvs = [ "align" - , "align*" - , "alignat" - , "alignat*" - , "aligned" - , "alignedat" - , "array" - , "Bmatrix" - , "bmatrix" - , "cases" - , "CD" - , "eqnarray" - , "eqnarray*" - , "equation" - , "equation*" - , "gather" - , "gather*" - , "gathered" - , "matrix" - , "multline" - , "multline*" - , "pmatrix" - , "smallmatrix" - , "split" - , "subarray" - , "Vmatrix" - , "vmatrix" ] - -allowsMathEnvironments :: HTMLMathMethod -> Bool -allowsMathEnvironments (MathJax _) = True -allowsMathEnvironments (MathML) = True -allowsMathEnvironments (WebTeX _) = True -allowsMathEnvironments _ = False diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs deleted file mode 100644 index 945e4a0f1..000000000 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ /dev/null @@ -1,370 +0,0 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} -{- -Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Haddock - Copyright : Copyright (C) 2014 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to haddock markup. - -Haddock: <http://www.haskell.org/haddock/doc/html/> --} -module Text.Pandoc.Writers.Haddock (writeHaddock) where -import Text.Pandoc.Definition -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Data.List ( intersperse, transpose ) -import Text.Pandoc.Pretty -import Control.Monad.State -import Text.Pandoc.Writers.Math (texMathToInlines) -import Network.URI (isURI) -import Data.Default -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - -type Notes = [[Block]] -data WriterState = WriterState { stNotes :: Notes } -instance Default WriterState - where def = WriterState{ stNotes = [] } - --- | Convert Pandoc to Haddock. -writeHaddock :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeHaddock opts document = - evalStateT (pandocToHaddock opts{ - writerWrapText = writerWrapText opts } document) def - --- | Return haddock representation of document. -pandocToHaddock :: PandocMonad m - => WriterOptions -> Pandoc -> StateT WriterState m String -pandocToHaddock opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - body <- blockListToHaddock opts blocks - st <- get - notes' <- notesToHaddock opts (reverse $ stNotes st) - let render' :: Doc -> String - render' = render colwidth - let main = render' $ body <> - (if isEmpty notes' then empty else blankline <> notes') - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToHaddock opts) - (fmap (render colwidth) . inlineListToHaddock opts) - meta - let context = defField "body" main - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Return haddock representation of notes. -notesToHaddock :: PandocMonad m - => WriterOptions -> [[Block]] -> StateT WriterState m Doc -notesToHaddock opts notes = - if null notes - then return empty - else do - contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes - return $ text "#notes#" <> blankline <> contents - --- | Escape special characters for Haddock. -escapeString :: String -> String -escapeString = escapeStringUsing haddockEscapes - where haddockEscapes = backslashEscapes "\\/'`\"@<" - --- | Convert Pandoc block element to haddock. -blockToHaddock :: PandocMonad m - => WriterOptions -- ^ Options - -> Block -- ^ Block element - -> StateT WriterState m Doc -blockToHaddock _ Null = return empty -blockToHaddock opts (Div _ ils) = do - contents <- blockListToHaddock opts ils - return $ contents <> blankline -blockToHaddock opts (Plain inlines) = do - contents <- inlineListToHaddock opts inlines - return $ contents <> cr --- title beginning with fig: indicates figure -blockToHaddock opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToHaddock opts (Para [Image attr alt (src,tit)]) -blockToHaddock opts (Para inlines) = - -- TODO: if it contains linebreaks, we need to use a @...@ block - (<> blankline) `fmap` blockToHaddock opts (Plain inlines) -blockToHaddock opts (LineBlock lns) = - blockToHaddock opts $ linesToPara lns -blockToHaddock _ b@(RawBlock f str) - | f == "haddock" = do - return $ text str <> text "\n" - | otherwise = do - report $ BlockNotRendered b - return empty -blockToHaddock opts HorizontalRule = - return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline -blockToHaddock opts (Header level (ident,_,_) inlines) = do - contents <- inlineListToHaddock opts inlines - let attr' = if null ident - then empty - else cr <> text "#" <> text ident <> text "#" - return $ nowrap (text (replicate level '=') <> space <> contents) - <> attr' <> blankline -blockToHaddock _ (CodeBlock (_,_,_) str) = - return $ prefixed "> " (text str) <> blankline --- Nothing in haddock corresponds to block quotes: -blockToHaddock opts (BlockQuote blocks) = - blockListToHaddock opts blocks --- Haddock doesn't have tables. Use haddock tables in code. -blockToHaddock opts (Table caption aligns widths headers rows) = do - caption' <- inlineListToHaddock opts caption - let caption'' = if null caption - then empty - else blankline <> caption' <> blankline - rawHeaders <- mapM (blockListToHaddock opts) headers - rawRows <- mapM (mapM (blockListToHaddock opts)) rows - let isSimple = all (==0) widths - let isPlainBlock (Plain _) = True - isPlainBlock _ = False - let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) - (nst,tbl) <- case True of - _ | isSimple -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | not hasBlocks -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | otherwise -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline -blockToHaddock opts (BulletList items) = do - contents <- mapM (bulletListItemToHaddock opts) items - return $ cat contents <> blankline -blockToHaddock opts (OrderedList (start,_,delim) items) = do - let attribs = (start, Decimal, delim) - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ - zip markers' items - return $ cat contents <> blankline -blockToHaddock opts (DefinitionList items) = do - contents <- mapM (definitionListItemToHaddock opts) items - return $ cat contents <> blankline - -pandocTable :: PandocMonad m - => WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> StateT WriterState m Doc -pandocTable opts headless aligns widths rawHeaders rawRows = do - let isSimple = all (==0) widths - let alignHeader alignment = case alignment of - AlignLeft -> lblock - AlignCenter -> cblock - AlignRight -> rblock - AlignDefault -> lblock - let numChars = maximum . map offset - let widthsInChars = if isSimple - then map ((+2) . numChars) - $ transpose (rawHeaders : rawRows) - else map - (floor . (fromIntegral (writerColumns opts) *)) - widths - let makeRow = hcat . intersperse (lblock 1 (text " ")) . - (zipWith3 alignHeader aligns widthsInChars) - let rows' = map makeRow rawRows - let head' = makeRow rawHeaders - let maxRowHeight = maximum $ map height (head':rows') - let underline = cat $ intersperse (text " ") $ - map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') - else if headless - then underline - else empty - let head'' = if headless - then empty - else border <> cr <> head' - let body = if maxRowHeight > 1 - then vsep rows' - else vcat rows' - let bottom = if headless - then underline - else border - return $ head'' $$ underline $$ body $$ bottom - -gridTable :: PandocMonad m - => WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> StateT WriterState m Doc -gridTable opts headless _aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if headless - then empty - else head' $$ border '=' - return $ border '-' $$ head'' $$ body $$ border '-' - --- | Convert bullet list item (list of blocks) to haddock -bulletListItemToHaddock :: PandocMonad m - => WriterOptions -> [Block] -> StateT WriterState m Doc -bulletListItemToHaddock opts items = do - contents <- blockListToHaddock opts items - let sps = replicate (writerTabStop opts - 2) ' ' - let start = text ('-' : ' ' : sps) - -- remove trailing blank line if it is a tight list - let contents' = case reverse items of - (BulletList xs:_) | isTightList xs -> - chomp contents <> cr - (OrderedList _ xs:_) | isTightList xs -> - chomp contents <> cr - _ -> contents - return $ hang (writerTabStop opts) start $ contents' <> cr - --- | Convert ordered list item (a list of blocks) to haddock -orderedListItemToHaddock :: PandocMonad m - => WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> StateT WriterState m Doc -orderedListItemToHaddock opts marker items = do - contents <- blockListToHaddock opts items - let sps = case length marker - writerTabStop opts of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - let start = text marker <> sps - return $ hang (writerTabStop opts) start $ contents <> cr - --- | Convert definition list item (label, list of blocks) to haddock -definitionListItemToHaddock :: PandocMonad m - => WriterOptions - -> ([Inline],[[Block]]) - -> StateT WriterState m Doc -definitionListItemToHaddock opts (label, defs) = do - labelText <- inlineListToHaddock opts label - defs' <- mapM (mapM (blockToHaddock opts)) defs - let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs' - return $ nowrap (brackets labelText) <> cr <> contents <> cr - --- | Convert list of Pandoc block elements to haddock -blockListToHaddock :: PandocMonad m - => WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> StateT WriterState m Doc -blockListToHaddock opts blocks = - mapM (blockToHaddock opts) blocks >>= return . cat - --- | Convert list of Pandoc inline elements to haddock. -inlineListToHaddock :: PandocMonad m - => WriterOptions -> [Inline] -> StateT WriterState m Doc -inlineListToHaddock opts lst = - mapM (inlineToHaddock opts) lst >>= return . cat - --- | Convert Pandoc inline element to haddock. -inlineToHaddock :: PandocMonad m - => WriterOptions -> Inline -> StateT WriterState m Doc -inlineToHaddock opts (Span (ident,_,_) ils) = do - contents <- inlineListToHaddock opts ils - if not (null ident) && null ils - then return $ "#" <> text ident <> "#" - else return contents -inlineToHaddock opts (Emph lst) = do - contents <- inlineListToHaddock opts lst - return $ "/" <> contents <> "/" -inlineToHaddock opts (Strong lst) = do - contents <- inlineListToHaddock opts lst - return $ "__" <> contents <> "__" -inlineToHaddock opts (Strikeout lst) = do - contents <- inlineListToHaddock opts lst - -- not supported in haddock, but we fake it: - return $ "~~" <> contents <> "~~" --- not supported in haddock: -inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst --- not supported in haddock: -inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst --- not supported in haddock: -inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst -inlineToHaddock opts (Quoted SingleQuote lst) = do - contents <- inlineListToHaddock opts lst - return $ "‘" <> contents <> "’" -inlineToHaddock opts (Quoted DoubleQuote lst) = do - contents <- inlineListToHaddock opts lst - return $ "“" <> contents <> "”" -inlineToHaddock _ (Code _ str) = - return $ "@" <> text (escapeString str) <> "@" -inlineToHaddock _ (Str str) = do - return $ text $ escapeString str -inlineToHaddock opts (Math mt str) = do - let adjust x = case mt of - DisplayMath -> cr <> x <> cr - InlineMath -> x - adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts) -inlineToHaddock _ il@(RawInline f str) - | f == "haddock" = return $ text str - | otherwise = do - report $ InlineNotRendered il - return empty --- no line break in haddock (see above on CodeBlock) -inlineToHaddock _ LineBreak = return cr -inlineToHaddock opts SoftBreak = - case writerWrapText opts of - WrapAuto -> return space - WrapNone -> return space - WrapPreserve -> return cr -inlineToHaddock _ Space = return space -inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst -inlineToHaddock _ (Link _ txt (src, _)) = do - let linktext = text $ escapeString $ stringify txt - let useAuto = isURI src && - case txt of - [Str s] | escapeURI s == src -> True - _ -> False - return $ nowrap $ "<" <> text src <> - (if useAuto then empty else space <> linktext) <> ">" -inlineToHaddock opts (Image attr alternate (source, tit)) = do - linkhaddock <- inlineToHaddock opts (Link attr alternate (source, tit)) - return $ "<" <> linkhaddock <> ">" --- haddock doesn't have notes, but we can fake it: -inlineToHaddock opts (Note contents) = do - modify (\st -> st{ stNotes = contents : stNotes st }) - st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) - return $ "<#notes [" <> ref <> "]>" diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs deleted file mode 100644 index efec17d26..000000000 --- a/src/Text/Pandoc/Writers/ICML.hs +++ /dev/null @@ -1,584 +0,0 @@ -{-# LANGUAGE OverloadedStrings, FlexibleContexts, ScopedTypeVariables #-} - -{- | - Module : Text.Pandoc.Writers.ICML - Copyright : Copyright (C) 2013-2016 github.com/mb21 - License : GNU GPL, version 2 or above - - Stability : alpha - -Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format -which is a subset of the zipped IDML format for which the documentation is -available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf -InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated -into InDesign with File -> Place. --} -module Text.Pandoc.Writers.ICML (writeICML) where -import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.XML -import Text.Pandoc.Writers.Math (texMathToInlines) -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared (linesToPara, splitBy) -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Data.List (isPrefixOf, isInfixOf, stripPrefix, intersperse) -import Data.Text as Text (breakOnAll, pack) -import Control.Monad.State -import Control.Monad.Except (runExceptT) -import Network.URI (isURI) -import qualified Data.Set as Set -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging -import qualified Text.Pandoc.Class as P - -type Style = [String] -type Hyperlink = [(Int, String)] - -data WriterState = WriterState{ - blockStyles :: Set.Set String - , inlineStyles :: Set.Set String - , links :: Hyperlink - , listDepth :: Int - , maxListDepth :: Int - } - -type WS m = StateT WriterState m - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ - blockStyles = Set.empty - , inlineStyles = Set.empty - , links = [] - , listDepth = 1 - , maxListDepth = 0 - } - --- inline names (appear in InDesign's character styles pane) -emphName :: String -strongName :: String -strikeoutName :: String -superscriptName :: String -subscriptName :: String -smallCapsName :: String -codeName :: String -linkName :: String -emphName = "Italic" -strongName = "Bold" -strikeoutName = "Strikeout" -superscriptName = "Superscript" -subscriptName = "Subscript" -smallCapsName = "SmallCaps" -codeName = "Code" -linkName = "Link" - --- block element names (appear in InDesign's paragraph styles pane) -paragraphName :: String -figureName :: String -imgCaptionName :: String -codeBlockName :: String -blockQuoteName :: String -orderedListName :: String -bulletListName :: String -defListTermName :: String -defListDefName :: String -headerName :: String -tableName :: String -tableHeaderName :: String -tableCaptionName :: String -alignLeftName :: String -alignRightName :: String -alignCenterName :: String -firstListItemName :: String -beginsWithName :: String -lowerRomanName :: String -upperRomanName :: String -lowerAlphaName :: String -upperAlphaName :: String -subListParName :: String -footnoteName :: String -citeName :: String -paragraphName = "Paragraph" -figureName = "Figure" -imgCaptionName = "Caption" -codeBlockName = "CodeBlock" -blockQuoteName = "Blockquote" -orderedListName = "NumList" -bulletListName = "BulList" -defListTermName = "DefListTerm" -defListDefName = "DefListDef" -headerName = "Header" -tableName = "TablePar" -tableHeaderName = "TableHeader" -tableCaptionName = "TableCaption" -alignLeftName = "LeftAlign" -alignRightName = "RightAlign" -alignCenterName = "CenterAlign" -firstListItemName = "first" -beginsWithName = "beginsWith-" -lowerRomanName = "lowerRoman" -upperRomanName = "upperRoman" -lowerAlphaName = "lowerAlpha" -upperAlphaName = "upperAlpha" -subListParName = "subParagraph" -footnoteName = "Footnote" -citeName = "Cite" - --- | Convert Pandoc document to string in ICML format. -writeICML :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeICML opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - render' = render colwidth - renderMeta f s = liftM (render' . fst) $ runStateT (f opts [] s) defaultWriterState - metadata <- metaToJSON opts - (renderMeta blocksToICML) - (renderMeta inlinesToICML) - meta - (doc, st) <- runStateT (blocksToICML opts [] blocks) defaultWriterState - let main = render' doc - context = defField "body" main - $ defField "charStyles" (render' $ charStylesToDoc st) - $ defField "parStyles" (render' $ parStylesToDoc st) - $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) - $ metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - --- | Auxilary functions for parStylesToDoc and charStylesToDoc. -contains :: String -> (String, (String, String)) -> [(String, String)] -contains s rule = - if isInfixOf (fst rule) s - then [snd rule] - else [] - --- | The monospaced font to use as default. -monospacedFont :: Doc -monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New" - --- | How much to indent blockquotes etc. -defaultIndent :: Int -defaultIndent = 20 - --- | How much to indent numbered lists before the number. -defaultListIndent :: Int -defaultListIndent = 10 - --- other constants -lineSeparator :: String -lineSeparator = "
" - --- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. -parStylesToDoc :: WriterState -> Doc -parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st - where - makeStyle s = - let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) - attrs = concat $ map (contains s) $ [ - (defListTermName, ("BulletsAndNumberingListType", "BulletList")) - , (defListTermName, ("FontStyle", "Bold")) - , (tableHeaderName, ("FontStyle", "Bold")) - , (alignLeftName, ("Justification", "LeftAlign")) - , (alignRightName, ("Justification", "RightAlign")) - , (alignCenterName, ("Justification", "CenterAlign")) - , (headerName++"1", ("PointSize", "36")) - , (headerName++"2", ("PointSize", "30")) - , (headerName++"3", ("PointSize", "24")) - , (headerName++"4", ("PointSize", "18")) - , (headerName++"5", ("PointSize", "14")) - ] - -- what is the most nested list type, if any? - (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s - where - findList [] = (False, False) - findList (x:xs) | x == bulletListName = (True, False) - | x == orderedListName = (False, True) - | otherwise = findList xs - nBuls = countSubStrs bulletListName s - nOrds = countSubStrs orderedListName s - attrs' = numbering ++ listType ++ indent ++ attrs - where - numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] - | otherwise = [] - listType | isOrderedList && (not $ isInfixOf subListParName s) - = [("BulletsAndNumberingListType", "NumberedList")] - | isBulletList && (not $ isInfixOf subListParName s) - = [("BulletsAndNumberingListType", "BulletList")] - | otherwise = [] - indent = [("LeftIndent", show indt)] - where - nBlockQuotes = countSubStrs blockQuoteName s - nDefLists = countSubStrs defListDefName s - indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) - props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) - where - font = if isInfixOf codeBlockName s - then monospacedFont - else empty - basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font - tabList = if isBulletList - then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")] - $ vcat [ - inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign" - , inTags False "AlignmentCharacter" [("type","string")] $ text "." - , selfClosingTag "Leader" [("type","string")] - , inTags False "Position" [("type","unit")] $ text - $ show $ defaultListIndent * (nBuls + nOrds) - ] - else empty - makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) - numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." - | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." - | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." - | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." - | otherwise = empty - in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props - --- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. -charStylesToDoc :: WriterState -> Doc -charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st - where - makeStyle s = - let attrs = concat $ map (contains s) [ - (strikeoutName, ("StrikeThru", "true")) - , (superscriptName, ("Position", "Superscript")) - , (subscriptName, ("Position", "Subscript")) - , (smallCapsName, ("Capitalization", "SmallCaps")) - ] - attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs - | isInfixOf strongName s = ("FontStyle", "Bold") : attrs - | isInfixOf emphName s = ("FontStyle", "Italic") : attrs - | otherwise = attrs - props = inTags True "Properties" [] $ - inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font - where - font = - if isInfixOf codeName s - then monospacedFont - else empty - in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props - --- | Escape colon characters as %3a -escapeColons :: String -> String -escapeColons (x:xs) - | x == ':' = "%3a" ++ escapeColons xs - | otherwise = x : escapeColons xs -escapeColons [] = [] - --- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. -hyperlinksToDoc :: Hyperlink -> Doc -hyperlinksToDoc [] = empty -hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs - where - hyp (ident, url) = hdest $$ hlink - where - hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++(escapeColons url)), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 - hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), - ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] - $ inTags True "Properties" [] - $ inTags False "BorderColor" [("type","enumeration")] (text "Black") - $$ (inTags False "Destination" [("type","object")] - $ text $ "HyperlinkURLDestination/"++(escapeColons (escapeStringForXML url))) -- HyperlinkURLDestination with more than one colon crashes CS6 - - --- | Convert a list of Pandoc blocks to ICML. -blocksToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc -blocksToICML opts style lst = do - docs <- mapM (blockToICML opts style) lst - return $ intersperseBrs docs - --- | Convert a Pandoc block element to ICML. -blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m Doc -blockToICML opts style (Plain lst) = parStyle opts style lst --- title beginning with fig: indicates that the image is a figure -blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do - figure <- parStyle opts (figureName:style) img - caption <- parStyle opts (imgCaptionName:style) txt - return $ intersperseBrs [figure, caption] -blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst -blockToICML opts style (LineBlock lns) = - blockToICML opts style $ linesToPara lns -blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML _ _ b@(RawBlock f str) - | f == Format "icml" = return $ text str - | otherwise = do - report $ BlockNotRendered b - return empty -blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks -blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst -blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst -blockToICML opts style (DefinitionList lst) = intersperseBrs `fmap` mapM (definitionListItemToICML opts style) lst -blockToICML opts style (Header lvl _ lst) = - let stl = (headerName ++ show lvl):style - in parStyle opts stl lst -blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead -blockToICML opts style (Table caption aligns widths headers rows) = - let style' = tableName : style - noHeader = all null headers - nrHeaders = if noHeader - then "0" - else "1" - nrRows = length rows - nrCols = if null rows - then 0 - else length $ head rows - rowsToICML [] _ = return empty - rowsToICML (col:rest) rowNr = - liftM2 ($$) (colsToICML col aligns rowNr (0::Int)) $ rowsToICML rest (rowNr+1) - colsToICML [] _ _ _ = return empty - colsToICML _ [] _ _ = return empty - colsToICML (cell:rest) (alig:restAligns) rowNr colNr = do - let stl = if rowNr == 0 && not noHeader - then tableHeaderName:style' - else style' - stl' | alig == AlignLeft = alignLeftName : stl - | alig == AlignRight = alignRightName : stl - | alig == AlignCenter = alignCenterName : stl - | otherwise = stl - c <- blocksToICML opts stl' cell - let cl = return $ inTags True "Cell" - [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c - liftM2 ($$) cl $ colsToICML rest restAligns rowNr (colNr+1) - in do - let tabl = if noHeader - then rows - else headers:rows - cells <- rowsToICML tabl (0::Int) - let colWidths w = if w > 0 - then [("SingleColumnWidth",show $ 500 * w)] - else [] - let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) - let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths - let tableDoc = return $ inTags True "Table" [ - ("AppliedTableStyle","TableStyle/Table") - , ("HeaderRowCount", nrHeaders) - , ("BodyRowCount", show nrRows) - , ("ColumnCount", show nrCols) - ] (colDescs $$ cells) - liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption -blockToICML opts style (Div _ lst) = blocksToICML opts style lst -blockToICML _ _ Null = return empty - --- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m Doc -listItemsToICML _ _ _ _ [] = return empty -listItemsToICML opts listType style attribs (first:rest) = do - st <- get - put st{ listDepth = 1 + listDepth st} - let stl = listType:style - let f = listItemToICML opts stl True attribs first - let r = map (listItemToICML opts stl False attribs) rest - docs <- sequence $ f:r - s <- get - let maxD = max (maxListDepth s) (listDepth s) - put s{ listDepth = 1, maxListDepth = maxD } - return $ intersperseBrs docs - --- | Convert a list of blocks to ICML list items. -listItemToICML :: PandocMonad m => WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS m Doc -listItemToICML opts style isFirst attribs item = - let makeNumbStart (Just (beginsWith, numbStl, _)) = - let doN DefaultStyle = [] - doN LowerRoman = [lowerRomanName] - doN UpperRoman = [upperRomanName] - doN LowerAlpha = [lowerAlphaName] - doN UpperAlpha = [upperAlphaName] - doN _ = [] - bw = if beginsWith > 1 - then [beginsWithName ++ show beginsWith] - else [] - in doN numbStl ++ bw - makeNumbStart Nothing = [] - stl = if isFirst - then firstListItemName:style - else style - stl' = makeNumbStart attribs ++ stl - in if length item > 1 - then do - let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst - insertTab block = blockToICML opts style block - f <- blockToICML opts stl' $ head item - r <- mapM insertTab $ tail item - return $ intersperseBrs (f : r) - else blocksToICML opts stl' item - -definitionListItemToICML :: PandocMonad m => WriterOptions -> Style -> ([Inline],[[Block]]) -> WS m Doc -definitionListItemToICML opts style (term,defs) = do - term' <- parStyle opts (defListTermName:style) term - defs' <- mapM (blocksToICML opts (defListDefName:style)) defs - return $ intersperseBrs $ (term' : defs') - - --- | Convert a list of inline elements to ICML. -inlinesToICML :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc -inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) - --- | Convert an inline element to ICML. -inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m Doc -inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str -inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst -inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst -inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst -inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst -inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst -inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst -inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] -inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] -inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst -inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str -inlineToICML _ style Space = charStyle style space -inlineToICML opts style SoftBreak = - case writerWrapText opts of - WrapAuto -> charStyle style space - WrapNone -> charStyle style space - WrapPreserve -> charStyle style cr -inlineToICML _ style LineBreak = charStyle style $ text lineSeparator -inlineToICML opts style (Math mt str) = - lift (texMathToInlines mt str) >>= - (fmap cat . mapM (inlineToICML opts style)) -inlineToICML _ _ il@(RawInline f str) - | f == Format "icml" = return $ text str - | otherwise = do - report $ InlineNotRendered il - return empty -inlineToICML opts style (Link _ lst (url, title)) = do - content <- inlinesToICML opts (linkName:style) lst - state $ \st -> - let ident = if null $ links st - then 1::Int - else 1 + (fst $ head $ links st) - newst = st{ links = (ident, url):(links st) } - cont = inTags True "HyperlinkTextSource" - [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content - in (cont, newst) -inlineToICML opts style (Image attr _ target) = imageICML opts style attr target -inlineToICML opts style (Note lst) = footnoteToICML opts style lst -inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst - --- | Convert a list of block elements to an ICML footnote. -footnoteToICML :: PandocMonad m => WriterOptions -> Style -> [Block] -> WS m Doc -footnoteToICML opts style lst = - let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls - insertTab block = blockToICML opts (footnoteName:style) block - in do - contents <- mapM insertTab lst - let number = inTags True "ParagraphStyleRange" [] $ - inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>" - return $ inTags True "CharacterStyleRange" - [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")] - $ inTags True "Footnote" [] $ number $$ intersperseBrs contents - --- | Auxiliary function to merge Space elements into the adjacent Strs. -mergeSpaces :: [Inline] -> [Inline] -mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x = - mergeSpaces $ Str(s++" "++s') : xs -mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs -mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs -mergeSpaces (x:xs) = x : (mergeSpaces xs) -mergeSpaces [] = [] - -isSp :: Inline -> Bool -isSp Space = True -isSp SoftBreak = True -isSp _ = False - --- | Intersperse line breaks -intersperseBrs :: [Doc] -> Doc -intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isEmpty) - --- | Wrap a list of inline elements in an ICML Paragraph Style -parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m Doc -parStyle opts style lst = - let slipIn x y = if null y - then x - else x ++ " > " ++ y - stlStr = foldr slipIn [] $ reverse style - stl = if null stlStr - then "" - else "ParagraphStyle/" ++ stlStr - attrs = ("AppliedParagraphStyle", stl) - attrs' = if firstListItemName `elem` style - then let ats = attrs : [("NumberingContinue", "false")] - begins = filter (isPrefixOf beginsWithName) style - in if null begins - then ats - else let i = maybe "" id $ stripPrefix beginsWithName $ head begins - in ("NumberingStartAt", i) : ats - else [attrs] - in do - content <- inlinesToICML opts [] lst - let cont = inTags True "ParagraphStyleRange" attrs' content - state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) - --- | Wrap a Doc in an ICML Character Style. -charStyle :: PandocMonad m => Style -> Doc -> WS m Doc -charStyle style content = - let (stlStr, attrs) = styleToStrAttr style - doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content - in do - state $ \st -> - let styles = if null stlStr - then st - else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } - in (doc, styles) - --- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. -styleToStrAttr :: Style -> (String, [(String, String)]) -styleToStrAttr style = - let stlStr = unwords $ Set.toAscList $ Set.fromList style - stl = if null style - then "$ID/NormalCharacterStyle" - else "CharacterStyle/" ++ stlStr - attrs = [("AppliedCharacterStyle", stl)] - in (stlStr, attrs) - --- | Assemble an ICML Image. -imageICML :: PandocMonad m => WriterOptions -> Style -> Attr -> Target -> WS m Doc -imageICML opts style attr (src, _) = do - res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src - imgS <- case res of - Left (_ :: PandocError) -> do - report $ CouldNotFetchResource src "" - return def - Right (img, _) -> do - case imageSize img of - Right size -> return size - Left msg -> do - report $ CouldNotDetermineImageSize src msg - return def - let (ow, oh) = sizeInPoints imgS - (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS - hw = showFl $ ow / 2 - hh = showFl $ oh / 2 - scale = showFl (imgWidth / ow) ++ " 0 0 " ++ showFl (imgHeight / oh) - src' = if isURI src then src else "file:" ++ src - (stlStr, attrs) = styleToStrAttr style - props = inTags True "Properties" [] $ inTags True "PathGeometry" [] - $ inTags True "GeometryPathType" [("PathOpen","false")] - $ inTags True "PathPointArray" [] - $ vcat [ - selfClosingTag "PathPointType" [("Anchor", "-"++hw++" -"++hh), - ("LeftDirection", "-"++hw++" -"++hh), ("RightDirection", "-"++hw++" -"++hh)] - , selfClosingTag "PathPointType" [("Anchor", "-"++hw++" "++hh), - ("LeftDirection", "-"++hw++" "++hh), ("RightDirection", "-"++hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" "++hh), - ("LeftDirection", hw++" "++hh), ("RightDirection", hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" -"++hh), - ("LeftDirection", hw++" -"++hh), ("RightDirection", hw++" -"++hh)] - ] - image = inTags True "Image" - [("Self","ue6"), ("ItemTransform", scale++" -"++hw++" -"++hh)] - $ vcat [ - inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" - , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] - ] - doc = inTags True "CharacterStyleRange" attrs - $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), - ("ItemTransform", scale++" "++hw++" -"++hh)] - $ (props $$ image) - state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs deleted file mode 100644 index ac2b5d758..000000000 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ /dev/null @@ -1,1388 +0,0 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, - PatternGuards #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into LaTeX. --} -module Text.Pandoc.Writers.LaTeX ( - writeLaTeX - , writeBeamer - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates -import Text.Pandoc.Logging -import Text.Printf ( printf ) -import Network.URI ( isURI, unEscapeString ) -import Data.Aeson (object, (.=), FromJSON) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, - nub, nubBy, foldl' ) -import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, - ord, isAlphaNum ) -import Data.Maybe ( fromMaybe, isJust, catMaybes ) -import qualified Data.Text as T -import Control.Applicative ((<|>)) -import Control.Monad.State -import qualified Text.Parsec as P -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Text.Pandoc.Slides -import Text.Pandoc.Highlighting (highlight, styleToLaTeX, - formatLaTeXInline, formatLaTeXBlock, - toListingsLanguage) -import Text.Pandoc.Class (PandocMonad, report) - -data WriterState = - WriterState { stInNote :: Bool -- true if we're in a note - , stInQuote :: Bool -- true if in a blockquote - , stInMinipage :: Bool -- true if in minipage - , stInHeading :: Bool -- true if in a section heading - , stNotes :: [Doc] -- notes in a minipage - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter - , stVerbInNote :: Bool -- true if document has verbatim text in note - , stTable :: Bool -- true if document has a table - , stStrikeout :: Bool -- true if document has strikeout - , stUrl :: Bool -- true if document has visible URL link - , stGraphics :: Bool -- true if document contains images - , stLHS :: Bool -- true if document has literate haskell code - , stBook :: Bool -- true if document uses book or memoir class - , stCsquotes :: Bool -- true if document uses csquotes - , stHighlighting :: Bool -- true if document has highlighted code - , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit - , stInternalLinks :: [String] -- list of internal link targets - , stUsesEuro :: Bool -- true if euro symbol used - , stBeamer :: Bool -- produce beamer - } - -startingState :: WriterOptions -> WriterState -startingState options = WriterState { - stInNote = False - , stInQuote = False - , stInMinipage = False - , stInHeading = False - , stNotes = [] - , stOLLevel = 1 - , stOptions = options - , stVerbInNote = False - , stTable = False - , stStrikeout = False - , stUrl = False - , stGraphics = False - , stLHS = False - , stBook = (case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False) - , stCsquotes = False - , stHighlighting = False - , stIncremental = writerIncremental options - , stInternalLinks = [] - , stUsesEuro = False - , stBeamer = False } - --- | Convert Pandoc to LaTeX. -writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeLaTeX options document = - evalStateT (pandocToLaTeX options document) $ - startingState options - --- | Convert Pandoc to LaTeX Beamer. -writeBeamer :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeBeamer options document = - evalStateT (pandocToLaTeX options document) $ - (startingState options){ stBeamer = True } - -type LW m = StateT WriterState m - -pandocToLaTeX :: PandocMonad m - => WriterOptions -> Pandoc -> LW m String -pandocToLaTeX options (Pandoc meta blocks) = do - -- Strip off final 'references' header if --natbib or --biblatex - let method = writerCiteMethod options - let blocks' = if method == Biblatex || method == Natbib - then case reverse blocks of - (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks - else blocks - -- see if there are internal links - let isInternalLink (Link _ _ ('#':xs,_)) = [xs] - isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } - let template = maybe "" id $ writerTemplate options - -- set stBook depending on documentclass - let colwidth = if writerWrapText options == WrapAuto - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta - let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - let documentClass = case P.parse pDocumentClass "template" template of - Right r -> r - Left _ -> "" - case lookup "documentclass" (writerVariables options) `mplus` - fmap stringify (lookupMeta "documentclass" meta) of - Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} - | otherwise -> return () - Nothing | documentClass `elem` bookClasses - -> modify $ \s -> s{stBook = True} - | otherwise -> return () - -- check for \usepackage...{csquotes}; if present, we'll use - -- \enquote{...} for smart quotes: - let headerIncludesField :: FromJSON a => Maybe a - headerIncludesField = getField "header-includes" metadata - let headerIncludes = fromMaybe [] $ mplus - (fmap return headerIncludesField) - headerIncludesField - when (any (isInfixOf "{csquotes}") (template : headerIncludes)) $ - modify $ \s -> s{stCsquotes = True} - let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then - (blocks', []) - else case last blocks' of - Header 1 _ il -> (init blocks', il) - _ -> (blocks', []) - beamer <- gets stBeamer - blocks''' <- if beamer - then toSlides blocks'' - else return blocks'' - body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' - (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth $ vsep body - st <- get - titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta - authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let docLangs = nub $ query (extract "lang") blocks - let hasStringValue x = isJust (getField x metadata :: Maybe String) - let geometryFromMargins = intercalate [','] $ catMaybes $ - map (\(x,y) -> - ((x ++ "=") ++) <$> getField y metadata) - [("lmargin","margin-left") - ,("rmargin","margin-right") - ,("tmargin","margin-top") - ,("bmargin","margin-bottom") - ] - let context = defField "toc" (writerTableOfContents options) $ - defField "toc-depth" (show (writerTOCDepth options - - if stBook st - then 1 - else 0)) $ - defField "body" main $ - defField "title-meta" titleMeta $ - defField "author-meta" (intercalate "; " authorsMeta) $ - defField "documentclass" (if beamer - then ("beamer" :: String) - else if stBook st - then "book" - else "article") $ - defField "verbatim-in-note" (stVerbInNote st) $ - defField "tables" (stTable st) $ - defField "strikeout" (stStrikeout st) $ - defField "url" (stUrl st) $ - defField "numbersections" (writerNumberSections options) $ - defField "lhs" (stLHS st) $ - defField "graphics" (stGraphics st) $ - defField "book-class" (stBook st) $ - defField "euro" (stUsesEuro st) $ - defField "listings" (writerListings options || stLHS st) $ - defField "beamer" beamer $ - (if stHighlighting st - then case writerHighlightStyle options of - Just sty -> - defField "highlighting-macros" - (styleToLaTeX sty) - Nothing -> id - else id) $ - (case writerCiteMethod options of - Natbib -> defField "biblio-title" biblioTitle . - defField "natbib" True - Biblatex -> defField "biblio-title" biblioTitle . - defField "biblatex" True - _ -> id) $ - -- set lang to something so polyglossia/babel is included - defField "lang" (if null docLangs then ""::String else "en") $ - defField "otherlangs" docLangs $ - defField "colorlinks" (any hasStringValue - ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ - defField "dir" (if (null $ query (extract "dir") blocks) - then ""::String - else "ltr") $ - defField "section-titles" True $ - defField "geometry" geometryFromMargins $ - metadata - let toPolyObj lang = object [ "name" .= T.pack name - , "options" .= T.pack opts ] - where - (name, opts) = toPolyglossia lang - let lang = maybe [] (splitBy (=='-')) $ getField "lang" context - otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context - let context' = - defField "babel-lang" (toBabel lang) - $ defField "babel-otherlangs" (map toBabel otherlangs) - $ defField "babel-newcommands" (concatMap (\(poly, babel) -> - -- \textspanish and \textgalician are already used by babel - -- save them as \oritext... and let babel use that - if poly `elem` ["spanish", "galician"] - then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" - ++ poly ++ "}}\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ poly ++ "}{##2}}}\n" - else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ "}[2][]{\\begin{otherlanguage}{" - ++ babel ++ "}}{\\end{otherlanguage}}\n" - ) - -- eliminate duplicates that have same polyglossia name - $ nubBy (\a b -> fst a == fst b) - -- find polyglossia and babel names of languages used in the document - $ map (\l -> - let lng = splitBy (=='-') l - in (fst $ toPolyglossia lng, toBabel lng) - ) - docLangs ) - $ defField "polyglossia-lang" (toPolyObj lang) - $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) - $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of - Just "rtl" -> True - _ -> False) - $ context - return $ case writerTemplate options of - Nothing -> main - Just tpl -> renderTemplate' tpl context' - --- | Convert Elements to LaTeX -elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc -elementToLaTeX _ (Blk block) = blockToLaTeX block -elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do - modify $ \s -> s{stInHeading = True} - header' <- sectionHeader ("unnumbered" `elem` classes) id' level title' - modify $ \s -> s{stInHeading = False} - innerContents <- mapM (elementToLaTeX opts) elements - return $ vsep (header' : innerContents) - -data StringContext = TextString - | URLString - | CodeString - deriving (Eq) - --- escape things as needed for LaTeX -stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String -stringToLaTeX _ [] = return "" -stringToLaTeX ctx (x:xs) = do - opts <- gets stOptions - rest <- stringToLaTeX ctx xs - let ligatures = isEnabled Ext_smart opts && ctx == TextString - let isUrl = ctx == URLString - when (x == '€') $ - modify $ \st -> st{ stUsesEuro = True } - return $ - case x of - '€' -> "\\euro{}" ++ rest - '{' -> "\\{" ++ rest - '}' -> "\\}" ++ rest - '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest - '$' | not isUrl -> "\\$" ++ rest - '%' -> "\\%" ++ rest - '&' -> "\\&" ++ rest - '_' | not isUrl -> "\\_" ++ rest - '#' -> "\\#" ++ rest - '-' | not isUrl -> case xs of - -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-\\/" ++ rest - _ -> '-' : rest - '~' | not isUrl -> "\\textasciitilde{}" ++ rest - '^' -> "\\^{}" ++ rest - '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows - | otherwise -> "\\textbackslash{}" ++ rest - '|' | not isUrl -> "\\textbar{}" ++ rest - '<' -> "\\textless{}" ++ rest - '>' -> "\\textgreater{}" ++ rest - '[' -> "{[}" ++ rest -- to avoid interpretation as - ']' -> "{]}" ++ rest -- optional arguments - '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest - '\160' -> "~" ++ rest - '\x202F' -> "\\," ++ rest - '\x2026' -> "\\ldots{}" ++ rest - '\x2018' | ligatures -> "`" ++ rest - '\x2019' | ligatures -> "'" ++ rest - '\x201C' | ligatures -> "``" ++ rest - '\x201D' | ligatures -> "''" ++ rest - '\x2014' | ligatures -> "---" ++ rest - '\x2013' | ligatures -> "--" ++ rest - _ -> x : rest - -toLabel :: PandocMonad m => String -> LW m String -toLabel z = go `fmap` stringToLaTeX URLString z - where go [] = "" - go (x:xs) - | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x ("_-+=:;." :: String) = x:go xs - | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs - --- | Puts contents into LaTeX command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '\\' <> text cmd <> braces contents - -toSlides :: PandocMonad m => [Block] -> LW m [Block] -toSlides bs = do - opts <- gets stOptions - let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts - let bs' = prepSlides slideLevel bs - concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') - -elementToBeamer :: PandocMonad m => Int -> Element -> LW m [Block] -elementToBeamer _slideLevel (Blk b) = return [b] -elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) - | lvl > slideLevel = do - bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ Para ( RawInline "latex" "\\begin{block}{" - : tit ++ [RawInline "latex" "}"] ) - : bs ++ [RawBlock "latex" "\\end{block}"] - | lvl < slideLevel = do - bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,kvs) tit) : bs - | otherwise = do -- lvl == slideLevel - -- note: [fragile] is required or verbatim breaks - let hasCodeBlock (CodeBlock _ _) = [True] - hasCodeBlock _ = [] - let hasCode (Code _ _) = [True] - hasCode _ = [] - let fragile = "fragile" `elem` classes || - not (null $ query hasCodeBlock elts ++ query hasCode elts) - let frameoptions = ["allowdisplaybreaks", "allowframebreaks", - "b", "c", "t", "environment", - "label", "plain", "shrink", "standout"] - let optionslist = ["fragile" | fragile] ++ - [k | k <- classes, k `elem` frameoptions] ++ - [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] - let options = if null optionslist - then "" - else "[" ++ intercalate "," optionslist ++ "]" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : - if tit == [Str "\0"] -- marker for hrule - then [] - else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] - let slideEnd = RawBlock "latex" "\\end{frame}" - -- now carve up slide into blocks if there are sections inside - bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ slideStart : bs ++ [slideEnd] - -isListBlock :: Block -> Bool -isListBlock (BulletList _) = True -isListBlock (OrderedList _ _) = True -isListBlock (DefinitionList _) = True -isListBlock _ = False - -isLineBreakOrSpace :: Inline -> Bool -isLineBreakOrSpace LineBreak = True -isLineBreakOrSpace SoftBreak = True -isLineBreakOrSpace Space = True -isLineBreakOrSpace _ = False - --- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: PandocMonad m - => Block -- ^ Block to convert - -> LW m Doc -blockToLaTeX Null = return empty -blockToLaTeX (Div (identifier,classes,kvs) bs) = do - beamer <- gets stBeamer - ref <- toLabel identifier - let linkAnchor = if null identifier - then empty - else "\\hypertarget" <> braces (text ref) <> - braces empty - let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir - let wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> id - wrapLang txt = case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if null o - then "" - else brackets $ text o - in inCmd "begin" (text l) <> ops - $$ blankline <> txt <> blankline - $$ inCmd "end" (text l) - Nothing -> txt - wrapNotes txt = if beamer && "notes" `elem` classes - then "\\note" <> braces txt -- speaker notes - else linkAnchor $$ txt - fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs -blockToLaTeX (Plain lst) = - inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst --- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do - inNote <- gets stInNote - modify $ \st -> st{ stInMinipage = True, stNotes = [] } - capt <- inlineListToLaTeX txt - notes <- gets stNotes - modify $ \st -> st{ stInMinipage = False, stNotes = [] } - - -- We can't have footnotes in the list of figures, so remove them: - captForLof <- if null notes - then return empty - else brackets <$> inlineListToLaTeX (walk deNote txt) - img <- inlineToLaTeX (Image attr txt (src,tit)) - let footnotes = notesToLaTeX notes - lab <- labelFor ident - let caption = "\\caption" <> captForLof <> braces capt <> lab - figure <- hypertarget ident (cr <> - "\\begin{figure}" $$ "\\centering" $$ img $$ - caption $$ "\\end{figure}" <> cr) - return $ if inNote - -- can't have figures in notes - then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" - else figure $$ footnotes --- . . . indicates pause in beamer slides -blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do - beamer <- gets stBeamer - if beamer - then blockToLaTeX (RawBlock "latex" "\\pause") - else inlineListToLaTeX [Str ".",Space,Str ".",Space,Str "."] -blockToLaTeX (Para lst) = - inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -blockToLaTeX (LineBlock lns) = do - blockToLaTeX $ linesToPara lns -blockToLaTeX (BlockQuote lst) = do - beamer <- gets stBeamer - case lst of - [b] | beamer && isListBlock b -> do - oldIncremental <- gets stIncremental - modify $ \s -> s{ stIncremental = not oldIncremental } - result <- blockToLaTeX b - modify $ \s -> s{ stIncremental = oldIncremental } - return result - _ -> do - oldInQuote <- gets stInQuote - modify (\s -> s{stInQuote = True}) - contents <- blockListToLaTeX lst - modify (\s -> s{stInQuote = oldInQuote}) - return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" -blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do - opts <- gets stOptions - ref <- toLabel identifier - let linkAnchor = if null identifier - then empty - else "\\hypertarget" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) - let lhsCodeBlock = do - modify $ \s -> s{ stLHS = True } - return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ - "\\end{code}") $$ cr - let rawCodeBlock = do - st <- get - env <- if stInNote st - then modify (\s -> s{ stVerbInNote = True }) >> - return "Verbatim" - else return "verbatim" - return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ - text str $$ text ("\\end{" ++ env ++ "}")) <> cr - let listingsCodeBlock = do - st <- get - let params = if writerListings (stOptions st) - then (case getListingsLanguage classes of - Just l -> [ "language=" ++ mbBraced l ] - Nothing -> []) ++ - [ "numbers=left" | "numberLines" `elem` classes - || "number" `elem` classes - || "number-lines" `elem` classes ] ++ - [ (if key == "startFrom" - then "firstnumber" - else key) ++ "=" ++ mbBraced attr | - (key,attr) <- keyvalAttr ] ++ - (if identifier == "" - then [] - else [ "label=" ++ ref ]) - - else [] - printParams - | null params = empty - | otherwise = brackets $ hcat (intersperse ", " - (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ - "\\end{lstlisting}") $$ cr - let highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ linkAnchor $$ text (T.unpack h)) - case () of - _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && - "literate" `elem` classes -> lhsCodeBlock - | writerListings opts -> listingsCodeBlock - | not (null classes) && isJust (writerHighlightStyle opts) - -> highlightedCodeBlock - | otherwise -> rawCodeBlock -blockToLaTeX b@(RawBlock f x) - | f == Format "latex" || f == Format "tex" - = return $ text x - | otherwise = do - report $ BlockNotRendered b - return empty -blockToLaTeX (BulletList []) = return empty -- otherwise latex error -blockToLaTeX (BulletList lst) = do - incremental <- gets stIncremental - beamer <- gets stBeamer - let inc = if beamer && incremental then "[<+->]" else "" - items <- mapM listItemToLaTeX lst - let spacing = if isTightList lst - then text "\\tightlist" - else empty - return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$ - "\\end{itemize}" -blockToLaTeX (OrderedList _ []) = return empty -- otherwise latex error -blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do - st <- get - let inc = if stIncremental st then "[<+->]" else "" - let oldlevel = stOLLevel st - put $ st {stOLLevel = oldlevel + 1} - items <- mapM listItemToLaTeX lst - modify (\s -> s {stOLLevel = oldlevel}) - let tostyle x = case numstyle of - Decimal -> "\\arabic" <> braces x - UpperRoman -> "\\Roman" <> braces x - LowerRoman -> "\\roman" <> braces x - UpperAlpha -> "\\Alph" <> braces x - LowerAlpha -> "\\alph" <> braces x - Example -> "\\arabic" <> braces x - DefaultStyle -> "\\arabic" <> braces x - let todelim x = case numdelim of - OneParen -> x <> ")" - TwoParens -> parens x - Period -> x <> "." - _ -> x <> "." - let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel) - let stylecommand = if numstyle == DefaultStyle && numdelim == DefaultDelim - then empty - else "\\def" <> "\\label" <> enum <> - braces (todelim $ tostyle enum) - let resetcounter = if start == 1 || oldlevel > 4 - then empty - else "\\setcounter" <> braces enum <> - braces (text $ show $ start - 1) - let spacing = if isTightList lst - then text "\\tightlist" - else empty - return $ text ("\\begin{enumerate}" ++ inc) - $$ stylecommand - $$ resetcounter - $$ spacing - $$ vcat items - $$ "\\end{enumerate}" -blockToLaTeX (DefinitionList []) = return empty -blockToLaTeX (DefinitionList lst) = do - incremental <- gets stIncremental - let inc = if incremental then "[<+->]" else "" - items <- mapM defListItemToLaTeX lst - let spacing = if all isTightList (map snd lst) - then text "\\tightlist" - else empty - return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ - "\\end{description}" -blockToLaTeX HorizontalRule = return $ - "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" -blockToLaTeX (Header level (id',classes,_) lst) = do - modify $ \s -> s{stInHeading = True} - hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst - modify $ \s -> s{stInHeading = False} - return hdr -blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else do - contents <- (tableRowToLaTeX True aligns widths) heads - return ("\\toprule" $$ contents $$ "\\midrule") - let endhead = if all null heads - then empty - else text "\\endhead" - let endfirsthead = if all null heads - then empty - else text "\\endfirsthead" - captionText <- inlineListToLaTeX caption - let capt = if isEmpty captionText - then empty - else text "\\caption" <> braces captionText <> "\\tabularnewline" - $$ headers - $$ endfirsthead - rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let colDescriptors = text $ concat $ map toColDescriptor aligns - modify $ \s -> s{ stTable = True } - return $ "\\begin{longtable}[]" <> - braces ("@{}" <> colDescriptors <> "@{}") - -- the @{} removes extra space at beginning and end - $$ capt - $$ (if all null heads then "\\toprule" else empty) - $$ headers - $$ endhead - $$ vcat rows' - $$ "\\bottomrule" - $$ "\\end{longtable}" - -toColDescriptor :: Alignment -> String -toColDescriptor align = - case align of - AlignLeft -> "l" - AlignRight -> "r" - AlignCenter -> "c" - AlignDefault -> "l" - -blockListToLaTeX :: PandocMonad m => [Block] -> LW m Doc -blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst - -tableRowToLaTeX :: PandocMonad m - => Bool - -> [Alignment] - -> [Double] - -> [[Block]] - -> LW m Doc -tableRowToLaTeX header aligns widths cols = do - -- scale factor compensates for extra space between columns - -- so the whole table isn't larger than columnwidth - let scaleFactor = 0.97 ** fromIntegral (length aligns) - let isSimple [Plain _] = True - isSimple [Para _] = True - isSimple [] = True - isSimple _ = False - -- simple tables have to have simple cells: - let widths' = if not (all isSimple cols) - then replicate (length aligns) - (0.97 / fromIntegral (length aligns)) - else map (scaleFactor *) widths - cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols - return $ hsep (intersperse "&" cells) <> "\\tabularnewline" - --- For simple latex tables (without minipages or parboxes), --- we need to go to some lengths to get line breaks working: --- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. -fixLineBreaks :: Block -> Block -fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils -fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils -fixLineBreaks x = x - -fixLineBreaks' :: [Inline] -> [Inline] -fixLineBreaks' ils = case splitBy (== LineBreak) ils of - [] -> [] - [xs] -> xs - chunks -> RawInline "tex" "\\vtop{" : - concatMap tohbox chunks ++ - [RawInline "tex" "}"] - where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ - [RawInline "tex" "}"] - --- We also change display math to inline math, since display --- math breaks in simple tables. -displayMathToInline :: Inline -> Inline -displayMathToInline (Math DisplayMath x) = Math InlineMath x -displayMathToInline x = x - -tableCellToLaTeX :: PandocMonad m => Bool -> (Double, Alignment, [Block]) - -> LW m Doc -tableCellToLaTeX _ (0, _, blocks) = - blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks -tableCellToLaTeX header (width, align, blocks) = do - modify $ \st -> st{ stInMinipage = True, stNotes = [] } - cellContents <- blockListToLaTeX blocks - notes <- gets stNotes - modify $ \st -> st{ stInMinipage = False, stNotes = [] } - let valign = text $ if header then "[b]" else "[t]" - let halign = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" - return $ ("\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> "\\strut" <> cr <> cellContents <> "\\strut" <> cr) <> - "\\end{minipage}") $$ - notesToLaTeX notes - -notesToLaTeX :: [Doc] -> Doc -notesToLaTeX [] = empty -notesToLaTeX ns = (case length ns of - n | n > 1 -> "\\addtocounter" <> - braces "footnote" <> - braces (text $ show $ 1 - n) - | otherwise -> empty) - $$ - vcat (intersperse - ("\\addtocounter" <> braces "footnote" <> braces "1") - $ map (\x -> "\\footnotetext" <> braces x) - $ reverse ns) - -listItemToLaTeX :: PandocMonad m => [Block] -> LW m Doc -listItemToLaTeX lst - -- we need to put some text before a header if it's the first - -- element in an item. This will look ugly in LaTeX regardless, but - -- this will keep the typesetter from throwing an error. - | ((Header _ _ _) :_) <- lst = - blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2) - | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) . - (nest 2) - -defListItemToLaTeX :: PandocMonad m => ([Inline], [[Block]]) -> LW m Doc -defListItemToLaTeX (term, defs) = do - term' <- inlineListToLaTeX term - -- put braces around term if it contains an internal link, - -- since otherwise we get bad bracket interactions: \item[\hyperref[..] - let isInternalLink (Link _ _ ('#':_,_)) = True - isInternalLink _ = False - let term'' = if any isInternalLink term - then braces term' - else term' - def' <- liftM vsep $ mapM blockListToLaTeX defs - return $ case defs of - (((Header _ _ _) : _) : _) -> - "\\item" <> brackets term'' <> " ~ " $$ def' - _ -> - "\\item" <> brackets term'' $$ def' - --- | Craft the section header, inserting the secton reference, if supplied. -sectionHeader :: PandocMonad m - => Bool -- True for unnumbered - -> [Char] - -> Int - -> [Inline] - -> LW m Doc -sectionHeader unnumbered ident level lst = do - txt <- inlineListToLaTeX lst - plain <- stringToLaTeX TextString $ concatMap stringify lst - let removeInvalidInline (Note _) = [] - removeInvalidInline (Span (id', _, _) _) | not (null id') = [] - removeInvalidInline (Image _ _ _) = [] - removeInvalidInline x = [x] - let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst - txtNoNotes <- inlineListToLaTeX lstNoNotes - -- footnotes in sections don't work (except for starred variants) - -- unless you specify an optional argument: - -- \section[mysec]{mysec\footnote{blah}} - optional <- if unnumbered || lstNoNotes == lst || lstNoNotes == [] - then return empty - else do - return $ brackets txtNoNotes - let contents = if render Nothing txt == plain - then braces txt - else braces (text "\\texorpdfstring" - <> braces txt - <> braces (text plain)) - book <- gets stBook - opts <- gets stOptions - let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault - then TopLevelChapter - else writerTopLevelDivision opts - beamer <- gets stBeamer - let level' = if beamer && - topLevelDivision `elem` [TopLevelPart, TopLevelChapter] - -- beamer has parts but no chapters - then if level == 1 then -1 else level - 1 - else case topLevelDivision of - TopLevelPart -> level - 2 - TopLevelChapter -> level - 1 - TopLevelSection -> level - TopLevelDefault -> level - let sectionType = case level' of - -1 -> "part" - 0 -> "chapter" - 1 -> "section" - 2 -> "subsection" - 3 -> "subsubsection" - 4 -> "paragraph" - 5 -> "subparagraph" - _ -> "" - inQuote <- gets stInQuote - let prefix = if inQuote && level' >= 4 - then text "\\mbox{}%" - -- needed for \paragraph, \subparagraph in quote environment - -- see http://tex.stackexchange.com/questions/169830/ - else empty - lab <- labelFor ident - let star = if unnumbered && level' < 4 then text "*" else empty - let stuffing = star <> optional <> contents - stuffing' <- hypertarget ident $ text ('\\':sectionType) <> stuffing <> lab - return $ if level' > 5 - then txt - else prefix $$ stuffing' - $$ if unnumbered - then "\\addcontentsline{toc}" <> - braces (text sectionType) <> - braces txtNoNotes - else empty - -hypertarget :: PandocMonad m => String -> Doc -> LW m Doc -hypertarget ident x = do - ref <- text `fmap` toLabel ident - internalLinks <- gets stInternalLinks - return $ - if ident `elem` internalLinks - then text "\\hypertarget" - <> braces ref - <> braces x - else x - -labelFor :: PandocMonad m => String -> LW m Doc -labelFor "" = return empty -labelFor ident = do - ref <- text `fmap` toLabel ident - return $ text "\\label" <> braces ref - --- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: PandocMonad m - => [Inline] -- ^ Inlines to convert - -> LW m Doc -inlineListToLaTeX lst = - mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) - >>= return . hcat - -- nonbreaking spaces (~) in LaTeX don't work after line breaks, - -- so we turn nbsps after hard breaks to \hspace commands. - -- this is mostly used in verse. - where fixLineInitialSpaces [] = [] - fixLineInitialSpaces (LineBreak : Str s@('\160':_) : xs) = - LineBreak : fixNbsps s ++ fixLineInitialSpaces xs - fixLineInitialSpaces (x:xs) = x : fixLineInitialSpaces xs - fixNbsps s = let (ys,zs) = span (=='\160') s - in replicate (length ys) hspace ++ [Str zs] - hspace = RawInline "latex" "\\hspace*{0.333em}" - -- linebreaks after blank lines cause problems: - fixBreaks [] = [] - fixBreaks ys@(LineBreak : LineBreak : _) = - case span (== LineBreak) ys of - (lbs, rest) -> RawInline "latex" - ("\\\\[" ++ show (length lbs) ++ - "\\baselineskip]") : fixBreaks rest - fixBreaks (y:ys) = y : fixBreaks ys - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted _ = False - --- | Convert inline element to LaTeX -inlineToLaTeX :: PandocMonad m - => Inline -- ^ Inline to convert - -> LW m Doc -inlineToLaTeX (Span (id',classes,kvs) ils) = do - ref <- toLabel id' - let linkAnchor = if null id' - then empty - else "\\protect\\hypertarget" <> braces (text ref) <> - braces empty - let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ - ["textnormal" | "csl-no-strong" `elem` classes || - "csl-no-smallcaps" `elem` classes] ++ - ["RL" | ("dir", "rtl") `elem` kvs] ++ - ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lookup "lang" kvs of - Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng - ops = if null o then "" else ("[" ++ o ++ "]") - in ["text" ++ l ++ ops] - Nothing -> []) - contents <- inlineListToLaTeX ils - return $ linkAnchor <> - if null cmds - then braces contents - else foldr inCmd contents cmds -inlineToLaTeX (Emph lst) = - inlineListToLaTeX lst >>= return . inCmd "emph" -inlineToLaTeX (Strong lst) = - inlineListToLaTeX lst >>= return . inCmd "textbf" -inlineToLaTeX (Strikeout lst) = do - -- we need to protect VERB in an mbox or we get an error - -- see #1294 - contents <- inlineListToLaTeX $ protectCode lst - modify $ \s -> s{ stStrikeout = True } - return $ inCmd "sout" contents -inlineToLaTeX (Superscript lst) = - inlineListToLaTeX lst >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do - inlineListToLaTeX lst >>= return . inCmd "textsubscript" -inlineToLaTeX (SmallCaps lst) = - inlineListToLaTeX lst >>= return . inCmd "textsc" -inlineToLaTeX (Cite cits lst) = do - st <- get - let opts = stOptions st - case writerCiteMethod opts of - Natbib -> citationsToNatbib cits - Biblatex -> citationsToBiblatex cits - _ -> inlineListToLaTeX lst - -inlineToLaTeX (Code (_,classes,_) str) = do - opts <- gets stOptions - inHeading <- gets stInHeading - case () of - _ | writerListings opts && not inHeading -> listingsCode - | isJust (writerHighlightStyle opts) && not (null classes) - -> highlightCode - | otherwise -> rawCode - where listingsCode = do - let listingsopt = case getListingsLanguage classes of - Just l -> "[language=" ++ mbBraced l ++ "]" - Nothing -> "" - inNote <- gets stInNote - when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = case "!\"&'()*,-./:;?@_" \\ str of - (c:_) -> c - [] -> '!' - return $ text $ "\\lstinline" ++ listingsopt ++ [chr] ++ str ++ [chr] - highlightCode = do - case highlight formatLaTeXInline ("",classes,[]) str of - Nothing -> rawCode - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (text (T.unpack h)) - rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) - $ stringToLaTeX CodeString str - where - escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) -inlineToLaTeX (Quoted qt lst) = do - contents <- inlineListToLaTeX lst - csquotes <- liftM stCsquotes get - opts <- gets stOptions - if csquotes - then return $ "\\enquote" <> braces contents - else do - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then "\\," - else empty - let inner = s1 <> contents <> s2 - return $ case qt of - DoubleQuote -> - if isEnabled Ext_smart opts - then text "``" <> inner <> text "''" - else char '\x201C' <> inner <> char '\x201D' - SingleQuote -> - if isEnabled Ext_smart opts - then char '`' <> inner <> char '\'' - else char '\x2018' <> inner <> char '\x2019' -inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str -inlineToLaTeX (Math InlineMath str) = - return $ "\\(" <> text str <> "\\)" -inlineToLaTeX (Math DisplayMath str) = - return $ "\\[" <> text str <> "\\]" -inlineToLaTeX il@(RawInline f str) - | f == Format "latex" || f == Format "tex" - = return $ text str - | otherwise = do - report $ InlineNotRendered il - return empty -inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr -inlineToLaTeX SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - case wrapText of - WrapAuto -> return space - WrapNone -> return space - WrapPreserve -> return cr -inlineToLaTeX Space = return space -inlineToLaTeX (Link _ txt ('#':ident, _)) = do - contents <- inlineListToLaTeX txt - lab <- toLabel ident - return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents -inlineToLaTeX (Link _ txt (src, _)) = - case txt of - [Str x] | escapeURI x == src -> -- autolink - do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString (escapeURI src) - return $ text $ "\\url{" ++ src' ++ "}" - [Str x] | Just rest <- stripPrefix "mailto:" src, - escapeURI x == rest -> -- email autolink - do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString (escapeURI src) - contents <- inlineListToLaTeX txt - return $ "\\href" <> braces (text src') <> - braces ("\\nolinkurl" <> braces contents) - _ -> do contents <- inlineListToLaTeX txt - src' <- stringToLaTeX URLString (escapeURI src) - return $ text ("\\href{" ++ src' ++ "}{") <> - contents <> char '}' -inlineToLaTeX (Image attr _ (source, _)) = do - modify $ \s -> s{ stGraphics = True } - opts <- gets stOptions - let showDim dir = let d = text (show dir) <> "=" - in case (dimension dir attr) of - Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] - Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] - Just dim -> - [d <> text (show dim)] - Nothing -> - [] - dimList = showDim Width ++ showDim Height - dims = if null dimList - then empty - else brackets $ cat (intersperse "," dimList) - source' = if isURI source - then source - else unEscapeString source - source'' <- stringToLaTeX URLString source' - inHeading <- gets stInHeading - return $ - (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> - dims <> braces (text source'') -inlineToLaTeX (Note contents) = do - inMinipage <- gets stInMinipage - modify (\s -> s{stInNote = True}) - contents' <- blockListToLaTeX contents - modify (\s -> s {stInNote = False}) - let optnl = case reverse contents of - (CodeBlock _ _ : _) -> cr - _ -> empty - let noteContents = nest 2 contents' <> optnl - beamer <- gets stBeamer - -- in beamer slides, display footnote from current overlay forward - let beamerMark = if beamer - then text "<.->" - else empty - modify $ \st -> st{ stNotes = noteContents : stNotes st } - return $ - if inMinipage - then "\\footnotemark{}" - -- note: a \n before } needed when note ends with a Verbatim environment - else "\\footnote" <> beamerMark <> braces noteContents - -protectCode :: [Inline] -> [Inline] -protectCode [] = [] -protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs -protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs - where ltx = RawInline (Format "latex") -protectCode (x : xs) = x : protectCode xs - -citationsToNatbib :: PandocMonad m => [Citation] -> LW m Doc -citationsToNatbib (one:[]) - = citeCommand c p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = one - c = case m of - AuthorInText -> "citet" - SuppressAuthor -> "citeyearpar" - NormalCitation -> "citep" - -citationsToNatbib cits - | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits - = citeCommand "citep" p s ks - where - noPrefix = all (null . citationPrefix) - noSuffix = all (null . citationSuffix) - ismode m = all (((==) m) . citationMode) - p = citationPrefix $ head $ cits - s = citationSuffix $ last $ cits - ks = intercalate ", " $ map citationId cits - -citationsToNatbib (c:cs) | citationMode c == AuthorInText = do - author <- citeCommand "citeauthor" [] [] (citationId c) - cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs) - return $ author <+> cits - -citationsToNatbib cits = do - cits' <- mapM convertOne cits - return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" - where - combineTwo a b | isEmpty a = b - | otherwise = a <> text "; " <> b - convertOne Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = case m of - AuthorInText -> citeCommand "citealt" p s k - SuppressAuthor -> citeCommand "citeyear" p s k - NormalCitation -> citeCommand "citealp" p s k - -citeCommand :: PandocMonad m - => String -> [Inline] -> [Inline] -> String -> LW m Doc -citeCommand c p s k = do - args <- citeArguments p s k - return $ text ("\\" ++ c) <> args - -citeArguments :: PandocMonad m - => [Inline] -> [Inline] -> String -> LW m Doc -citeArguments p s k = do - let s' = case s of - (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r - (Str (x:xs) : r) | isPunctuation x -> Str xs : r - _ -> s - pdoc <- inlineListToLaTeX p - sdoc <- inlineListToLaTeX s' - let optargs = case (isEmpty pdoc, isEmpty sdoc) of - (True, True ) -> empty - (True, False) -> brackets sdoc - (_ , _ ) -> brackets pdoc <> brackets sdoc - return $ optargs <> braces (text k) - -citationsToBiblatex :: PandocMonad m => [Citation] -> LW m Doc -citationsToBiblatex (one:[]) - = citeCommand cmd p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } = one - cmd = case m of - SuppressAuthor -> "autocite*" - AuthorInText -> "textcite" - NormalCitation -> "autocite" - -citationsToBiblatex (c:cs) = do - args <- mapM convertOne (c:cs) - return $ text cmd <> foldl' (<>) empty args - where - cmd = case citationMode c of - SuppressAuthor -> "\\autocites*" - AuthorInText -> "\\textcites" - NormalCitation -> "\\autocites" - convertOne Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - } - = citeArguments p s k - -citationsToBiblatex _ = return empty - --- Determine listings language from list of class attributes. -getListingsLanguage :: [String] -> Maybe String -getListingsLanguage [] = Nothing -getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs - -mbBraced :: String -> String -mbBraced x = if not (all isAlphaNum x) - then "{" <> x <> "}" - else x - --- Extract a key from divs and spans -extract :: String -> Block -> [String] -extract key (Div attr _) = lookKey key attr -extract key (Plain ils) = concatMap (extractInline key) ils -extract key (Para ils) = concatMap (extractInline key) ils -extract key (Header _ _ ils) = concatMap (extractInline key) ils -extract _ _ = [] - --- Extract a key from spans -extractInline :: String -> Inline -> [String] -extractInline key (Span attr _) = lookKey key attr -extractInline _ _ = [] - --- Look up a key in an attribute and give a list of its values -lookKey :: String -> Attr -> [String] -lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs - --- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: String -> (String, String) -toPolyglossiaEnv l = - case toPolyglossia $ (splitBy (=='-')) l of - ("arabic", o) -> ("Arabic", o) - x -> x - --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Polyglossia (language, options) tuple --- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: [String] -> (String, String) -toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") -toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") -toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") -toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") -toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") -toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") -toPolyglossia ("de":"1901":_) = ("german", "spelling=old") -toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") -toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") -toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") -toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") -toPolyglossia ("de":_) = ("german", "") -toPolyglossia ("dsb":_) = ("lsorbian", "") -toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") -toPolyglossia ("en":"AU":_) = ("english", "variant=australian") -toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") -toPolyglossia ("en":"GB":_) = ("english", "variant=british") -toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") -toPolyglossia ("en":"UK":_) = ("english", "variant=british") -toPolyglossia ("en":"US":_) = ("english", "variant=american") -toPolyglossia ("grc":_) = ("greek", "variant=ancient") -toPolyglossia ("hsb":_) = ("usorbian", "") -toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic") -toPolyglossia ("sl":_) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") - --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Babel language string. --- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf --- List of supported languages (slightly outdated): --- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: [String] -> String -toBabel ("de":"1901":_) = "german" -toBabel ("de":"AT":"1901":_) = "austrian" -toBabel ("de":"AT":_) = "naustrian" -toBabel ("de":"CH":"1901":_) = "swissgerman" -toBabel ("de":"CH":_) = "nswissgerman" -toBabel ("de":_) = "ngerman" -toBabel ("dsb":_) = "lowersorbian" -toBabel ("el":"polyton":_) = "polutonikogreek" -toBabel ("en":"AU":_) = "australian" -toBabel ("en":"CA":_) = "canadian" -toBabel ("en":"GB":_) = "british" -toBabel ("en":"NZ":_) = "newzealand" -toBabel ("en":"UK":_) = "british" -toBabel ("en":"US":_) = "american" -toBabel ("fr":"CA":_) = "canadien" -toBabel ("fra":"aca":_) = "acadian" -toBabel ("grc":_) = "polutonikogreek" -toBabel ("hsb":_) = "uppersorbian" -toBabel ("la":"x":"classic":_) = "classiclatin" -toBabel ("sl":_) = "slovene" -toBabel x = commonFromBcp47 x - --- Takes a list of the constituents of a BCP 47 language code --- and converts it to a string shared by Babel and Polyglossia. --- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: [String] -> String -commonFromBcp47 [] = "" -commonFromBcp47 ("pt":"BR":_) = "brazil" --- Note: documentation says "brazilian" works too, but it doesn't seem to work --- on some systems. See #2953. -commonFromBcp47 ("sr":"Cyrl":_) = "serbianc" -commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin" -commonFromBcp47 x = fromIso $ head x - where - fromIso "af" = "afrikaans" - fromIso "am" = "amharic" - fromIso "ar" = "arabic" - fromIso "as" = "assamese" - fromIso "ast" = "asturian" - fromIso "bg" = "bulgarian" - fromIso "bn" = "bengali" - fromIso "bo" = "tibetan" - fromIso "br" = "breton" - fromIso "ca" = "catalan" - fromIso "cy" = "welsh" - fromIso "cs" = "czech" - fromIso "cop" = "coptic" - fromIso "da" = "danish" - fromIso "dv" = "divehi" - fromIso "el" = "greek" - fromIso "en" = "english" - fromIso "eo" = "esperanto" - fromIso "es" = "spanish" - fromIso "et" = "estonian" - fromIso "eu" = "basque" - fromIso "fa" = "farsi" - fromIso "fi" = "finnish" - fromIso "fr" = "french" - fromIso "fur" = "friulan" - fromIso "ga" = "irish" - fromIso "gd" = "scottish" - fromIso "gez" = "ethiopic" - fromIso "gl" = "galician" - fromIso "he" = "hebrew" - fromIso "hi" = "hindi" - fromIso "hr" = "croatian" - fromIso "hu" = "magyar" - fromIso "hy" = "armenian" - fromIso "ia" = "interlingua" - fromIso "id" = "indonesian" - fromIso "ie" = "interlingua" - fromIso "is" = "icelandic" - fromIso "it" = "italian" - fromIso "jp" = "japanese" - fromIso "km" = "khmer" - fromIso "kmr" = "kurmanji" - fromIso "kn" = "kannada" - fromIso "ko" = "korean" - fromIso "la" = "latin" - fromIso "lo" = "lao" - fromIso "lt" = "lithuanian" - fromIso "lv" = "latvian" - fromIso "ml" = "malayalam" - fromIso "mn" = "mongolian" - fromIso "mr" = "marathi" - fromIso "nb" = "norsk" - fromIso "nl" = "dutch" - fromIso "nn" = "nynorsk" - fromIso "no" = "norsk" - fromIso "nqo" = "nko" - fromIso "oc" = "occitan" - fromIso "pa" = "panjabi" - fromIso "pl" = "polish" - fromIso "pms" = "piedmontese" - fromIso "pt" = "portuguese" - fromIso "rm" = "romansh" - fromIso "ro" = "romanian" - fromIso "ru" = "russian" - fromIso "sa" = "sanskrit" - fromIso "se" = "samin" - fromIso "sk" = "slovak" - fromIso "sq" = "albanian" - fromIso "sr" = "serbian" - fromIso "sv" = "swedish" - fromIso "syr" = "syriac" - fromIso "ta" = "tamil" - fromIso "te" = "telugu" - fromIso "th" = "thai" - fromIso "ti" = "ethiopic" - fromIso "tk" = "turkmen" - fromIso "tr" = "turkish" - fromIso "uk" = "ukrainian" - fromIso "ur" = "urdu" - fromIso "vi" = "vietnamese" - fromIso _ = "" - -pDocumentOptions :: P.Parsec String () [String] -pDocumentOptions = do - P.char '[' - opts <- P.sepBy - (P.many $ P.spaces *> P.noneOf (" ,]" :: String) <* P.spaces) - (P.char ',') - P.char ']' - return opts - -pDocumentClass :: P.Parsec String () String -pDocumentClass = - do P.skipMany (P.satisfy (/='\\')) - P.string "\\documentclass" - classOptions <- pDocumentOptions <|> return [] - if ("article" :: String) `elem` classOptions - then return "article" - else do P.skipMany (P.satisfy (/='{')) - P.char '{' - P.manyTill P.letter (P.char '}') diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs deleted file mode 100644 index f33acef32..000000000 --- a/src/Text/Pandoc/Writers/Man.hs +++ /dev/null @@ -1,381 +0,0 @@ -{- -Copyright (C) 2007-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to groff man page format. - --} -module Text.Pandoc.Writers.Man ( writeMan) where -import Text.Pandoc.Definition -import Text.Pandoc.Templates -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Writers.Math -import Text.Printf ( printf ) -import Data.List ( stripPrefix, intersperse, intercalate ) -import Data.Maybe (fromMaybe) -import Text.Pandoc.Pretty -import Text.Pandoc.Builder (deleteMeta) -import Control.Monad.State -import Text.Pandoc.Error -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - -type Notes = [[Block]] -data WriterState = WriterState { stNotes :: Notes - , stHasTables :: Bool } - --- | Convert Pandoc to Man. -writeMan :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeMan opts document = evalStateT (pandocToMan opts document) (WriterState [] False) - --- | Return groff man representation of document. -pandocToMan :: PandocMonad m => WriterOptions -> Pandoc -> StateT WriterState m String -pandocToMan opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - let render' = render colwidth - titleText <- inlineListToMan opts $ docTitle meta - let title' = render' titleText - let setFieldsFromTitle = - case break (== ' ') title' of - (cmdName, rest) -> case break (=='(') cmdName of - (xs, '(':ys) | not (null ys) && - last ys == ')' -> - defField "title" xs . - defField "section" (init ys) . - case splitBy (=='|') rest of - (ft:hds) -> - defField "footer" (trim ft) . - defField "header" - (trim $ concat hds) - [] -> id - _ -> defField "title" title' - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMan opts) - (fmap (render colwidth) . inlineListToMan opts) - $ deleteMeta "title" meta - body <- blockListToMan opts blocks - notes <- liftM stNotes get - notes' <- notesToMan opts (reverse notes) - let main = render' $ body $$ notes' $$ text "" - hasTables <- liftM stHasTables get - let context = defField "body" main - $ setFieldsFromTitle - $ defField "has-tables" hasTables - $ defField "hyphenate" True - $ defField "pandoc-version" pandocVersion - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Return man representation of notes. -notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc -notesToMan opts notes = - if null notes - then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= - return . (text ".SH NOTES" $$) . vcat - --- | Return man representation of a note. -noteToMan :: PandocMonad m => WriterOptions -> Int -> [Block] -> StateT WriterState m Doc -noteToMan opts num note = do - contents <- blockListToMan opts note - let marker = cr <> text ".SS " <> brackets (text (show num)) - return $ marker $$ contents - --- | Association list of characters to escape. -manEscapes :: [(Char, String)] -manEscapes = [ ('\160', "\\ ") - , ('\'', "\\[aq]") - , ('’', "'") - , ('\x2014', "\\[em]") - , ('\x2013', "\\[en]") - , ('\x2026', "\\&...") - ] ++ backslashEscapes "-@\\" - --- | Escape special characters for Man. -escapeString :: String -> String -escapeString = escapeStringUsing manEscapes - --- | Escape a literal (code) section for Man. -escapeCode :: String -> String -escapeCode = concat . intersperse "\n" . map escapeLine . lines where - escapeLine codeline = - case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of - a@('.':_) -> "\\&" ++ a - b -> b - --- We split inline lists into sentences, and print one sentence per --- line. groff/troff treats the line-ending period differently. --- See http://code.google.com/p/pandoc/issues/detail?id=148. - --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence [] = ([],[]) -breakSentence xs = - let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True - isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline (LineBreak) = True - isSentenceEndInline _ = False - (as, bs) = break isSentenceEndInline xs - in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) - (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - --- | Convert Pandoc block element to man. -blockToMan :: PandocMonad m - => WriterOptions -- ^ Options - -> Block -- ^ Block element - -> StateT WriterState m Doc -blockToMan _ Null = return empty -blockToMan opts (Div _ bs) = blockListToMan opts bs -blockToMan opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines -blockToMan opts (Para inlines) = do - contents <- liftM vcat $ mapM (inlineListToMan opts) $ - splitSentences inlines - return $ text ".PP" $$ contents -blockToMan opts (LineBlock lns) = - blockToMan opts $ linesToPara lns -blockToMan _ b@(RawBlock f str) - | f == Format "man" = return $ text str - | otherwise = do - report $ BlockNotRendered b - return empty -blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" -blockToMan opts (Header level _ inlines) = do - contents <- inlineListToMan opts inlines - let heading = case level of - 1 -> ".SH " - _ -> ".SS " - return $ text heading <> contents -blockToMan _ (CodeBlock _ str) = return $ - text ".IP" $$ - text ".nf" $$ - text "\\f[C]" $$ - text (escapeCode str) $$ - text "\\f[]" $$ - text ".fi" -blockToMan opts (BlockQuote blocks) = do - contents <- blockListToMan opts blocks - return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) = - let aligncode AlignLeft = "l" - aligncode AlignRight = "r" - aligncode AlignCenter = "c" - aligncode AlignDefault = "l" - in do - caption' <- inlineListToMan opts caption - modify $ \st -> st{ stHasTables = True } - let iwidths = if all (== 0) widths - then repeat "" - else map (printf "w(%0.1fn)" . (70 *)) widths - -- 78n default width - 8n indent = 70n - let coldescriptions = text $ intercalate " " - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." - colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ - text "T}" - let colheadings' = if all null headers - then empty - else makeRow colheadings $$ char '_' - body <- mapM (\row -> do - cols <- mapM (blockListToMan opts) row - return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ text ".TE" - -blockToMan opts (BulletList items) = do - contents <- mapM (bulletListItemToMan opts) items - return (vcat contents) -blockToMan opts (OrderedList attribs items) = do - let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + (maximum $ map length markers) - contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ - zip markers items - return (vcat contents) -blockToMan opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMan opts) items - return (vcat contents) - --- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: PandocMonad m => WriterOptions -> [Block] -> StateT WriterState m Doc -bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = - bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do - first' <- blockToMan opts (Plain first) - rest' <- blockListToMan opts rest - let first'' = text ".IP \\[bu] 2" $$ first' - let rest'' = if null rest - then empty - else text ".RS 2" $$ rest' $$ text ".RE" - return (first'' $$ rest'') -bulletListItemToMan opts (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" - --- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: PandocMonad m - => WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> StateT WriterState m Doc -orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = - orderedListItemToMan opts num indent ((Plain first):rest) -orderedListItemToMan opts num indent (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' - let rest'' = if null rest - then empty - else text ".RS 4" $$ rest' $$ text ".RE" - return $ first'' $$ rest'' - --- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: PandocMonad m - => WriterOptions - -> ([Inline],[[Block]]) - -> StateT WriterState m Doc -definitionListItemToMan opts (label, defs) = do - labelText <- inlineListToMan opts label - contents <- if null defs - then return empty - else liftM vcat $ forM defs $ \blocks -> do - (first, rest) <- case blocks of - ((Para x):y) -> return (Plain x,y) - (x:y) -> return (x,y) - [] -> throwError $ PandocSomeError "blocks is null" - rest' <- liftM vcat $ - mapM (\item -> blockToMan opts item) rest - first' <- blockToMan opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents - --- | Convert list of Pandoc block elements to man. -blockListToMan :: PandocMonad m - => WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> StateT WriterState m Doc -blockListToMan opts blocks = - mapM (blockToMan opts) blocks >>= (return . vcat) - --- | Convert list of Pandoc inline elements to man. -inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc -inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) - --- | Convert Pandoc inline element to man. -inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc -inlineToMan opts (Span _ ils) = inlineListToMan opts ils -inlineToMan opts (Emph lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[B]" <> contents <> text "\\f[]" -inlineToMan opts (Strikeout lst) = do - contents <- inlineListToMan opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToMan opts (Superscript lst) = do - contents <- inlineListToMan opts lst - return $ char '^' <> contents <> char '^' -inlineToMan opts (Subscript lst) = do - contents <- inlineListToMan opts lst - return $ char '~' <> contents <> char '~' -inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported -inlineToMan opts (Quoted SingleQuote lst) = do - contents <- inlineListToMan opts lst - return $ char '`' <> contents <> char '\'' -inlineToMan opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMan opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" -inlineToMan opts (Cite _ lst) = - inlineListToMan opts lst -inlineToMan _ (Code _ str) = - return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" -inlineToMan _ (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escapeString str) -inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = - lift (texMathToInlines InlineMath str) >>= inlineListToMan opts -inlineToMan opts (Math DisplayMath str) = do - contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts - return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ il@(RawInline f str) - | f == Format "man" = return $ text str - | otherwise = do - report $ InlineNotRendered il - return empty -inlineToMan _ LineBreak = return $ - cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr -inlineToMan _ SoftBreak = return space -inlineToMan _ Space = return space -inlineToMan opts (Link _ txt (src, _)) = do - linktext <- inlineListToMan opts txt - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - return $ case txt of - [Str s] - | escapeURI s == srcSuffix -> - char '<' <> text srcSuffix <> char '>' - _ -> linktext <> text " (" <> text src <> char ')' -inlineToMan opts (Image attr alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] - else alternate - linkPart <- inlineToMan opts (Link attr txt (source, tit)) - return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan _ (Note contents) = do - -- add to notes in state - modify $ \st -> st{ stNotes = contents : stNotes st } - notes <- liftM stNotes get - let ref = show $ (length notes) - return $ char '[' <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs deleted file mode 100644 index a97c32542..000000000 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ /dev/null @@ -1,1147 +0,0 @@ -{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables, MultiWayIf #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to markdown-formatted plain text. - -Markdown: <http://daringfireball.net/projects/markdown/> --} -module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) -import Data.Maybe (fromMaybe) -import Data.Monoid (Any(..)) -import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace, isPunctuation, ord, chr ) -import Data.Ord ( comparing ) -import Text.Pandoc.Pretty -import Control.Monad.Reader -import Control.Monad.State -import Control.Monad.Except (throwError) -import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Math (texMathToInlines) -import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) -import Network.URI (isURI) -import Data.Default -import Data.Yaml (Value(Object,String,Array,Bool,Number)) -import qualified Data.HashMap.Strict as H -import qualified Data.Vector as V -import qualified Data.Text as T -import qualified Data.Set as Set -import Network.HTTP ( urlEncode ) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - -type Notes = [[Block]] -type Ref = ([Inline], Target, Attr) -type Refs = [Ref] - -type MD m = ReaderT WriterEnv (StateT WriterState m) - -evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a -evalMD md env st = evalStateT (runReaderT md env) st - -data WriterEnv = WriterEnv { envInList :: Bool - , envPlain :: Bool - , envRefShortcutable :: Bool - , envBlockLevel :: Int - , envEscapeSpaces :: Bool - } - -instance Default WriterEnv - where def = WriterEnv { envInList = False - , envPlain = False - , envRefShortcutable = True - , envBlockLevel = 0 - , envEscapeSpaces = False - } - -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: Set.Set String - , stNoteNum :: Int - } - -instance Default WriterState - where def = WriterState{ stNotes = [] - , stRefs = [] - , stIds = Set.empty - , stNoteNum = 1 - } - --- | Convert Pandoc to Markdown. -writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeMarkdown opts document = - evalMD (pandocToMarkdown opts{ - writerWrapText = if isEnabled Ext_hard_line_breaks opts - then WrapNone - else writerWrapText opts } - document) def def - --- | Convert Pandoc to plain text (like markdown, but without links, --- pictures, or inline formatting). -writePlain :: PandocMonad m => WriterOptions -> Pandoc -> m String -writePlain opts document = - evalMD (pandocToMarkdown opts document) def{ envPlain = True } def - -pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc -pandocTitleBlock tit auths dat = - hang 2 (text "% ") tit <> cr <> - hang 2 (text "% ") (vcat $ map nowrap auths) <> cr <> - hang 2 (text "% ") dat <> cr - -mmdTitleBlock :: Value -> Doc -mmdTitleBlock (Object hashmap) = - vcat $ map go $ sortBy (comparing fst) $ H.toList hashmap - where go (k,v) = - case (text (T.unpack k), v) of - (k', Array vec) - | V.null vec -> empty - | otherwise -> k' <> ":" <> space <> - hcat (intersperse "; " - (map fromstr $ V.toList vec)) - (_, String "") -> empty - (k', x) -> k' <> ":" <> space <> nest 2 (fromstr x) - fromstr (String s) = text (removeBlankLines $ T.unpack s) - fromstr (Bool b) = text (show b) - fromstr (Number n) = text (show n) - fromstr _ = empty - -- blank lines not allowed in MMD metadata - we replace with . - removeBlankLines = trimr . unlines . map (\x -> - if all isSpace x then "." else x) . lines -mmdTitleBlock _ = empty - -plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc -plainTitleBlock tit auths dat = - tit <> cr <> - (hcat (intersperse (text "; ") auths)) <> cr <> - dat <> cr - -yamlMetadataBlock :: Value -> Doc -yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "---" - -jsonToYaml :: Value -> Doc -jsonToYaml (Object hashmap) = - vcat $ map (\(k,v) -> - case (text (T.unpack k), v, jsonToYaml v) of - (k', Array vec, x) - | V.null vec -> empty - | otherwise -> (k' <> ":") $$ x - (k', Object _, x) -> (k' <> ":") $$ nest 2 x - (_, String "", _) -> empty - (k', _, x) | k == "meta-json" -> empty - | otherwise -> k' <> ":" <> space <> hang 2 "" x) - $ sortBy (comparing fst) $ H.toList hashmap -jsonToYaml (Array vec) = - vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec -jsonToYaml (String "") = empty -jsonToYaml (String s) = - case T.unpack s of - x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x - | not (any isPunctuation x) -> text x - | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'" -jsonToYaml (Bool b) = text $ show b -jsonToYaml (Number n) = text $ show n -jsonToYaml _ = empty - --- | Return markdown representation of document. -pandocToMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> MD m String -pandocToMarkdown opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - isPlain <- asks envPlain - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToMarkdown opts) - (fmap (render colwidth) . inlineListToMarkdown opts) - meta - let title' = maybe empty text $ getField "title" metadata - let authors' = maybe [] (map text) $ getField "author" metadata - let date' = maybe empty text $ getField "date" metadata - let titleblock = case writerTemplate opts of - Just _ | isPlain -> - plainTitleBlock title' authors' date' - | isEnabled Ext_yaml_metadata_block opts -> - yamlMetadataBlock metadata - | isEnabled Ext_pandoc_title_block opts -> - pandocTitleBlock title' authors' date' - | isEnabled Ext_mmd_title_block opts -> - mmdTitleBlock metadata - | otherwise -> empty - Nothing -> empty - let headerBlocks = filter isHeaderBlock blocks - toc <- if writerTableOfContents opts - then tableOfContents opts headerBlocks - else return empty - -- Strip off final 'references' header if markdown citations enabled - let blocks' = if isEnabled Ext_citations opts - then case reverse blocks of - (Div (_,["references"],_) _):xs -> reverse xs - _ -> blocks - else blocks - body <- blockListToMarkdown opts blocks' - notesAndRefs' <- notesAndRefs opts - let render' :: Doc -> String - render' = render colwidth - let main = render' $ body <> notesAndRefs' - let context = defField "toc" (render' toc) - $ defField "body" main - $ (if isNullMeta meta - then id - else defField "titleblock" (render' titleblock)) - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Return markdown representation of reference key table. -refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc -refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat - --- | Return markdown representation of a reference key. -keyToMarkdown :: PandocMonad m - => WriterOptions - -> Ref - -> MD m Doc -keyToMarkdown opts (label, (src, tit), attr) = do - label' <- inlineListToMarkdown opts label - let tit' = if null tit - then empty - else space <> "\"" <> text tit <> "\"" - return $ nest 2 $ hang 2 - ("[" <> label' <> "]:" <> space) (text src <> tit') - <> linkAttributes opts attr - --- | Return markdown representation of notes. -notesToMarkdown :: PandocMonad m => WriterOptions -> [[Block]] -> MD m Doc -notesToMarkdown opts notes = do - n <- gets stNoteNum - notes' <- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [n..] notes) - modify $ \st -> st { stNoteNum = stNoteNum st + length notes } - return $ vsep notes' - --- | Return markdown representation of a note. -noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m Doc -noteToMarkdown opts num blocks = do - contents <- blockListToMarkdown opts blocks - let num' = text $ writerIdentifierPrefix opts ++ show num - let marker = if isEnabled Ext_footnotes opts - then text "[^" <> num' <> text "]:" - else text "[" <> num' <> text "]" - let markerSize = 4 + offset num' - let spacer = case writerTabStop opts - markerSize of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - return $ if isEnabled Ext_footnotes opts - then hang (writerTabStop opts) (marker <> spacer) contents - else marker <> spacer <> contents - --- | Escape special characters for Markdown. -escapeString :: WriterOptions -> String -> String -escapeString _ [] = [] -escapeString opts (c:cs) = - case c of - '<' -> "<" ++ escapeString opts cs - '>' -> ">" ++ escapeString opts cs - _ | c `elem` ['\\','`','*','_','[',']','#'] -> - '\\':c:escapeString opts cs - '^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs - '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs - '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString opts cs - _ -> '-':escapeString opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest - _ -> '.':escapeString opts cs - _ -> c : escapeString opts cs - --- | Construct table of contents from list of header blocks. -tableOfContents :: PandocMonad m => WriterOptions -> [Block] -> m Doc -tableOfContents opts headers = - let contents = BulletList $ map (elementToListItem opts) $ hierarchicalize headers - in evalMD (blockToMarkdown opts contents) def def - --- | Converts an Element to a list item for a table of contents, -elementToListItem :: WriterOptions -> Element -> [Block] -elementToListItem opts (Sec lev _nums (ident,_,_) headerText subsecs) - = Plain headerLink : - [ BulletList (map (elementToListItem opts) subsecs) | - not (null subsecs) && lev < writerTOCDepth opts ] - where headerLink = if null ident - then walk deNote headerText - else [Link nullAttr (walk deNote headerText) - ('#':ident, "")] -elementToListItem _ (Blk _) = [] - -attrsToMarkdown :: Attr -> Doc -attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] - where attribId = case attribs of - ([],_,_) -> empty - (i,_,_) -> "#" <> text i - attribClasses = case attribs of - (_,[],_) -> empty - (_,cs,_) -> hsep $ - map (text . ('.':)) - cs - attribKeys = case attribs of - (_,_,[]) -> empty - (_,_,ks) -> hsep $ - map (\(k,v) -> text k - <> "=\"" <> text v <> "\"") ks - -linkAttributes :: WriterOptions -> Attr -> Doc -linkAttributes opts attr = - if isEnabled Ext_link_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty - --- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char -olMarker = do (start, style', delim) <- anyOrderedListMarker - if delim == Period && - (style' == UpperAlpha || (style' == UpperRoman && - start `elem` [1, 5, 10, 50, 100, 500, 1000])) - then spaceChar >> spaceChar - else spaceChar - --- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of - Left _ -> False - Right _ -> True - -notesAndRefs :: PandocMonad m => WriterOptions -> MD m Doc -notesAndRefs opts = do - notes' <- reverse <$> gets stNotes >>= notesToMarkdown opts - modify $ \s -> s { stNotes = [] } - refs' <- reverse <$> gets stRefs >>= refsToMarkdown opts - modify $ \s -> s { stRefs = [] } - - let endSpacing = - if | writerReferenceLocation opts == EndOfDocument -> empty - | isEmpty notes' && isEmpty refs' -> empty - | otherwise -> blankline - - return $ - (if isEmpty notes' then empty else blankline <> notes') <> - (if isEmpty refs' then empty else blankline <> refs') <> - endSpacing - --- | Convert Pandoc block element to markdown. -blockToMarkdown :: PandocMonad m - => WriterOptions -- ^ Options - -> Block -- ^ Block element - -> MD m Doc -blockToMarkdown opts blk = - local (\env -> env {envBlockLevel = envBlockLevel env + 1}) $ - do doc <- blockToMarkdown' opts blk - blkLevel <- asks envBlockLevel - if writerReferenceLocation opts == EndOfBlock && blkLevel == 1 - then notesAndRefs opts >>= (\d -> return $ doc <> d) - else return doc - -blockToMarkdown' :: PandocMonad m - => WriterOptions -- ^ Options - -> Block -- ^ Block element - -> MD m Doc -blockToMarkdown' _ Null = return empty -blockToMarkdown' opts (Div attrs ils) = do - contents <- blockListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts && - isEnabled Ext_markdown_in_html_blocks opts - then tagWithAttrs "div" attrs <> blankline <> - contents <> blankline <> "</div>" <> blankline - else contents <> blankline -blockToMarkdown' opts (Plain inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - isPlain <- asks envPlain - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] - let contents' = if isEnabled Ext_all_symbols_escapable opts && - not isPlain && beginsWithOrderedListMarker rendered - then text $ escapeDelimiter rendered - else contents - return $ contents' <> cr --- title beginning with fig: indicates figure -blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToMarkdown opts (Para [Image attr alt (src,tit)]) -blockToMarkdown' opts (Para inlines) = - (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) -blockToMarkdown' opts (LineBlock lns) = - if isEnabled Ext_line_blocks opts - then do - mdLines <- mapM (inlineListToMarkdown opts) lns - return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline - else blockToMarkdown opts $ linesToPara lns -blockToMarkdown' opts b@(RawBlock f str) - | f == "markdown" = return $ text str <> text "\n" - | f == "html" && isEnabled Ext_raw_html opts = do - plain <- asks envPlain - return $ if plain - then empty - else if isEnabled Ext_markdown_attribute opts - then text (addMarkdownAttribute str) <> text "\n" - else text str <> text "\n" - | f `elem` ["latex", "tex"] && isEnabled Ext_raw_tex opts = do - plain <- asks envPlain - return $ if plain - then empty - else text str <> text "\n" - | otherwise = do - report $ BlockNotRendered b - return empty -blockToMarkdown' opts HorizontalRule = do - return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline -blockToMarkdown' opts (Header level attr inlines) = do - -- first, if we're putting references at the end of a section, we - -- put them here. - blkLevel <- asks envBlockLevel - refs <- if writerReferenceLocation opts == EndOfSection && blkLevel == 1 - then notesAndRefs opts - else return empty - - plain <- asks envPlain - -- we calculate the id that would be used by auto_identifiers - -- so we know whether to print an explicit identifier - ids <- gets stIds - let autoId = uniqueIdent inlines ids - modify $ \st -> st{ stIds = Set.insert autoId ids } - let attr' = case attr of - ("",[],[]) -> empty - (id',[],[]) | isEnabled Ext_auto_identifiers opts - && id' == autoId -> empty - (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> - space <> brackets (text id') - _ | isEnabled Ext_header_attributes opts -> - space <> attrsToMarkdown attr - | otherwise -> empty - contents <- inlineListToMarkdown opts $ - if level == 1 && plain - then capitalize inlines - else inlines - let setext = writerSetextHeaders opts - hdr = nowrap $ case level of - 1 | plain -> blanklines 3 <> contents <> blanklines 2 - | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '=') <> - blankline - 2 | plain -> blanklines 2 <> contents <> blankline - | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '-') <> - blankline - -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | plain || isEnabled Ext_literate_haskell opts -> - contents <> blankline - _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline - - return $ refs <> hdr -blockToMarkdown' opts (CodeBlock (_,classes,_) str) - | "haskell" `elem` classes && "literate" `elem` classes && - isEnabled Ext_literate_haskell opts = - return $ prefixed "> " (text str) <> blankline -blockToMarkdown' opts (CodeBlock attribs str) = return $ - case attribs == nullAttr of - False | isEnabled Ext_backtick_code_blocks opts -> - backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline - | isEnabled Ext_fenced_code_blocks opts -> - tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline - _ -> nest (writerTabStop opts) (text str) <> blankline - where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of - [] -> "~~~~" - xs -> case maximum $ map length xs of - n | n < 3 -> "~~~~" - | otherwise -> replicate (n+1) '~' - backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of - [] -> "```" - xs -> case maximum $ map length xs of - n | n < 3 -> "```" - | otherwise -> replicate (n+1) '`' - attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ " " <> attrsToMarkdown attribs - else case attribs of - (_,(cls:_),_) -> " " <> text cls - _ -> empty -blockToMarkdown' opts (BlockQuote blocks) = do - plain <- asks envPlain - -- if we're writing literate haskell, put a space before the bird tracks - -- so they won't be interpreted as lhs... - let leader = if isEnabled Ext_literate_haskell opts - then " > " - else if plain then " " else "> " - contents <- blockListToMarkdown opts blocks - return $ (prefixed leader contents) <> blankline -blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do - caption' <- inlineListToMarkdown opts caption - let caption'' = if null caption || not (isEnabled Ext_table_captions opts) - then empty - else blankline <> ": " <> caption' <> blankline - rawHeaders <- mapM (blockListToMarkdown opts) headers - rawRows <- mapM (mapM (blockListToMarkdown opts)) rows - let isLineBreak LineBreak = Any True - isLineBreak _ = Any False - let isSimple = all (==0) widths && - not ( getAny (query isLineBreak (headers:rows)) ) - let isPlainBlock (Plain _) = True - isPlainBlock _ = False - let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) - (nst,tbl) <- case True of - _ | isSimple && - isEnabled Ext_simple_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isSimple && - isEnabled Ext_pipe_tables opts -> fmap (id,) $ - pipeTable (all null headers) aligns rawHeaders rawRows - | not hasBlocks && - isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $ - pandocTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_grid_tables opts -> fmap (id,) $ - gridTable opts (all null headers) aligns widths - rawHeaders rawRows - | isEnabled Ext_raw_html opts -> fmap (id,) $ - text <$> - (writeHtml5String def $ Pandoc nullMeta [t]) - | otherwise -> return $ (id, text "[TABLE]") - return $ nst $ tbl $$ blankline $$ caption'' $$ blankline -blockToMarkdown' opts (BulletList items) = do - contents <- inList $ mapM (bulletListItemToMarkdown opts) items - return $ cat contents <> blankline -blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do - let start' = if isEnabled Ext_startnum opts then start else 1 - let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle - let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim - let attribs = (start', sty', delim') - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- inList $ - mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ cat contents <> blankline -blockToMarkdown' opts (DefinitionList items) = do - contents <- inList $ mapM (definitionListItemToMarkdown opts) items - return $ cat contents <> blankline - -inList :: Monad m => MD m a -> MD m a -inList p = local (\env -> env {envInList = True}) p - -addMarkdownAttribute :: String -> String -addMarkdownAttribute s = - case span isTagText $ reverse $ parseTags s of - (xs,(TagOpen t attrs:rest)) -> - renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs) - where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, - x /= "markdown"] - _ -> s - -pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc] -> [[Doc]] -> MD m Doc -pipeTable headless aligns rawHeaders rawRows = do - let sp = text " " - let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty - blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty - blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty - blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty - let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) - let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ - zipWith3 blockFor aligns widths (map chomp cs)) - <> text "|" - let toborder (a, w) = text $ case a of - AlignLeft -> ':':replicate (w + 1) '-' - AlignCenter -> ':':replicate w '-' ++ ":" - AlignRight -> replicate (w + 1) '-' ++ ":" - AlignDefault -> replicate (w + 2) '-' - -- note: pipe tables can't completely lack a - -- header; for a headerless table, we need a header of empty cells. - -- see jgm/pandoc#1996. - let header = if headless - then torow (replicate (length aligns) empty) - else torow rawHeaders - let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ - map toborder $ zip aligns widths) <> text "|" - let body = vcat $ map torow rawRows - return $ header $$ border $$ body - -pandocTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD m Doc -pandocTable opts headless aligns widths rawHeaders rawRows = do - let isSimple = all (==0) widths - let alignHeader alignment = case alignment of - AlignLeft -> lblock - AlignCenter -> cblock - AlignRight -> rblock - AlignDefault -> lblock - -- Number of characters per column necessary to output every cell - -- without requiring a line break. - -- The @+2@ is needed for specifying the alignment. - let numChars = (+ 2) . maximum . map offset - -- Number of characters per column necessary to output every cell - -- without requiring a line break *inside a word*. - -- The @+2@ is needed for specifying the alignment. - let minNumChars = (+ 2) . maximum . map minOffset - let columns = transpose (rawHeaders : rawRows) - -- minimal column width without wrapping a single word - let noWordWrapWidth - | writerWrapText opts == WrapAuto - = fromIntegral $ maximum (map minNumChars columns) - | otherwise = fromIntegral $ maximum (map numChars columns) - let relWidth w = floor $ max (fromIntegral (writerColumns opts) * w) - (noWordWrapWidth * w / minimum widths) - let widthsInChars - | isSimple = map numChars columns - | otherwise = map relWidth widths - let makeRow = hcat . intersperse (lblock 1 (text " ")) . - (zipWith3 alignHeader aligns widthsInChars) - let rows' = map makeRow rawRows - let head' = makeRow rawHeaders - let maxRowHeight = maximum $ map height (head':rows') - let underline = cat $ intersperse (text " ") $ - map (\width -> text (replicate width '-')) widthsInChars - let border = if maxRowHeight > 1 - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') - else if headless - then underline - else empty - let head'' = if headless - then empty - else border <> cr <> head' - let body = if maxRowHeight > 1 - then vsep rows' - else vcat rows' - let bottom = if headless - then underline - else border - return $ head'' $$ underline $$ body $$ bottom - -gridTable :: PandocMonad m => WriterOptions -> Bool -> [Alignment] -> [Double] - -> [Doc] -> [[Doc]] -> MD m Doc -gridTable opts headless aligns widths headers' rawRows = do - let numcols = length headers' - let widths' = if all (==0) widths - then replicate numcols (1.0 / fromIntegral numcols) - else widths - let widthsInChars = map - ((\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *)) widths' - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = chomp $ hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map (makeRow . map chomp) rawRows - let borderpart ch align widthInChars = - let widthInChars' = if widthInChars < 1 then 1 else widthInChars - in (if (align == AlignLeft || align == AlignCenter) - then char ':' - else char ch) <> - text (replicate widthInChars' ch) <> - (if (align == AlignRight || align == AlignCenter) - then char ':' - else char ch) - let border ch aligns' widthsInChars' = - char '+' <> - hcat (intersperse (char '+') (zipWith (borderpart ch) - aligns' widthsInChars')) <> char '+' - let body = vcat $ intersperse (border '-' (repeat AlignDefault) widthsInChars) - rows' - let head'' = if headless - then empty - else head' $$ border '=' aligns widthsInChars - if headless - then return $ - border '-' aligns widthsInChars $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - else return $ - border '-' (repeat AlignDefault) widthsInChars $$ - head'' $$ - body $$ - border '-' (repeat AlignDefault) widthsInChars - -itemEndsWithTightList :: [Block] -> Bool -itemEndsWithTightList bs = - case bs of - [Plain _, BulletList xs] -> isTightList xs - [Plain _, OrderedList _ xs] -> isTightList xs - _ -> False - --- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m Doc -bulletListItemToMarkdown opts bs = do - contents <- blockListToMarkdown opts bs - let sps = replicate (writerTabStop opts - 2) ' ' - let start = text ('-' : ' ' : sps) - -- remove trailing blank line if item ends with a tight list - let contents' = if itemEndsWithTightList bs - then chomp contents <> cr - else contents - return $ hang (writerTabStop opts) start $ contents' <> cr - --- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: PandocMonad m - => WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> MD m Doc -orderedListItemToMarkdown opts marker bs = do - contents <- blockListToMarkdown opts bs - let sps = case length marker - writerTabStop opts of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - let start = text marker <> sps - -- remove trailing blank line if item ends with a tight list - let contents' = if itemEndsWithTightList bs - then chomp contents <> cr - else contents - return $ hang (writerTabStop opts) start $ contents' <> cr - --- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: PandocMonad m - => WriterOptions - -> ([Inline],[[Block]]) - -> MD m Doc -definitionListItemToMarkdown opts (label, defs) = do - labelText <- inlineListToMarkdown opts label - defs' <- mapM (mapM (blockToMarkdown opts)) defs - if isEnabled Ext_definition_lists opts - then do - let tabStop = writerTabStop opts - isPlain <- asks envPlain - let leader = if isPlain then " " else ": " - let sps = case writerTabStop opts - 3 of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - if isEnabled Ext_compact_definition_lists opts - then do - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) - $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr - else do - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) - $ vcat d <> cr) defs' - let isTight = case defs of - ((Plain _ : _): _) -> True - _ -> False - return $ blankline <> nowrap labelText <> - (if isTight then cr else blankline) <> contents <> blankline - else do - return $ nowrap labelText <> text " " <> cr <> - vsep (map vsep defs') <> blankline - --- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: PandocMonad m - => WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> MD m Doc -blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat - -- insert comment between list and indented code block, or the - -- code block will be treated as a list continuation paragraph - where fixBlocks (b : CodeBlock attr x : rest) - | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = b : commentSep : CodeBlock attr x : - fixBlocks rest - fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = - b1 : commentSep : fixBlocks (b2:bs) - fixBlocks (x : xs) = x : fixBlocks xs - fixBlocks [] = [] - isListBlock (BulletList _) = True - isListBlock (OrderedList _ _) = True - isListBlock (DefinitionList _) = True - isListBlock _ = False - commentSep = if isEnabled Ext_raw_html opts - then RawBlock "html" "<!-- -->\n" - else RawBlock "markdown" " " - --- | Get reference for target; if none exists, create unique one and return. --- Prefer label if possible; otherwise, generate a unique key. -getReference :: PandocMonad m => Attr -> [Inline] -> Target -> MD m [Inline] -getReference attr label target = do - st <- get - case find (\(_,t,a) -> t == target && a == attr) (stRefs st) of - Just (ref, _, _) -> return ref - Nothing -> do - label' <- case find (\(l,_,_) -> l == label) (stRefs st) of - Just _ -> -- label is used; generate numerical label - case find (\n -> notElem [Str (show n)] - (map (\(l,_,_) -> l) (stRefs st))) - [1..(10000 :: Integer)] of - Just x -> return [Str (show x)] - Nothing -> throwError $ PandocSomeError "no unique label" - Nothing -> return label - modify (\s -> s{ stRefs = (label', target, attr) : stRefs st }) - return label' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m Doc -inlineListToMarkdown opts lst = do - inlist <- asks envInList - go (if inlist then avoidBadWrapsInList lst else lst) - where go [] = return empty - go (i:is) = case i of - (Link _ _ _) -> case is of - -- If a link is followed by another link or '[' we don't shortcut - (Link _ _ _):_ -> unshortcutable - Space:(Link _ _ _):_ -> unshortcutable - Space:(Str('[':_)):_ -> unshortcutable - Space:(RawInline _ ('[':_)):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:(Link _ _ _):_ -> unshortcutable - SoftBreak:(Str('[':_)):_ -> unshortcutable - SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str ('[':_):_ -> unshortcutable - (RawInline _ ('[':_)):_ -> unshortcutable - (RawInline _ (' ':'[':_)):_ -> unshortcutable - _ -> shortcutable - _ -> shortcutable - where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) - unshortcutable = do - iMark <- local - (\env -> env { envRefShortcutable = False }) - (inlineToMarkdown opts i) - fmap (iMark <>) (go is) - -isSp :: Inline -> Bool -isSp Space = True -isSp SoftBreak = True -isSp _ = False - -avoidBadWrapsInList :: [Inline] -> [Inline] -avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s = - Str (' ':'>':cs) : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str [c]:[]) - | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : [] -avoidBadWrapsInList (s:Str [c]:Space:xs) - | isSp s && c `elem` ['-','*','+'] = - Str [' ', c] : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str cs:Space:xs) - | isSp s && isOrderedListMarker cs = - Str (' ':cs) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str cs:[]) - | isSp s && isOrderedListMarker cs = Str (' ':cs) : [] -avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs - -isOrderedListMarker :: String -> Bool -isOrderedListMarker xs = (last xs `elem` ['.',')']) && - isRight (runParser (anyOrderedListMarker >> eof) - defaultParserState "" xs) - -isRight :: Either a b -> Bool -isRight (Right _) = True -isRight (Left _) = False - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc -inlineToMarkdown opts (Span attrs ils) = do - plain <- asks envPlain - contents <- inlineListToMarkdown opts ils - return $ case plain of - True -> contents - False | isEnabled Ext_bracketed_spans opts -> - "[" <> contents <> "]" <> - if attrs == nullAttr - then "{}" - else linkAttributes opts attrs - | isEnabled Ext_raw_html opts || - isEnabled Ext_native_spans opts -> - tagWithAttrs "span" attrs <> contents <> text "</span>" - | otherwise -> contents -inlineToMarkdown opts (Emph lst) = do - plain <- asks envPlain - contents <- inlineListToMarkdown opts lst - return $ if plain - then "_" <> contents <> "_" - else "*" <> contents <> "*" -inlineToMarkdown opts (Strong lst) = do - plain <- asks envPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" -inlineToMarkdown opts (Strikeout lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_strikeout opts - then "~~" <> contents <> "~~" - else if isEnabled Ext_raw_html opts - then "<s>" <> contents <> "</s>" - else contents -inlineToMarkdown opts (Superscript lst) = - local (\env -> env {envEscapeSpaces = True}) $ do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_superscript opts - then "^" <> contents <> "^" - else if isEnabled Ext_raw_html opts - then "<sup>" <> contents <> "</sup>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSuperscript ds) - _ -> contents - where toSuperscript '1' = '\x00B9' - toSuperscript '2' = '\x00B2' - toSuperscript '3' = '\x00B3' - toSuperscript c = chr (0x2070 + (ord c - 48)) -inlineToMarkdown opts (Subscript lst) = - local (\env -> env {envEscapeSpaces = True}) $ do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_subscript opts - then "~" <> contents <> "~" - else if isEnabled Ext_raw_html opts - then "<sub>" <> contents <> "</sub>" - else case (render Nothing contents) of - ds | all (\d -> d >= '0' && d <= '9') ds - -> text (map toSubscript ds) - _ -> contents - where toSubscript c = chr (0x2080 + (ord c - 48)) -inlineToMarkdown opts (SmallCaps lst) = do - plain <- asks envPlain - if not plain && - (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) - then do - contents <- inlineListToMarkdown opts lst - return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) - <> contents <> text "</span>" - else inlineListToMarkdown opts $ capitalize lst -inlineToMarkdown opts (Quoted SingleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "'" <> contents <> "'" - else "‘" <> contents <> "’" -inlineToMarkdown opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "\"" <> contents <> "\"" - else "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (\s -> '`' `elem` s) $ group str - let longest = if null tickGroups - then 0 - else maximum $ map length tickGroups - let marker = replicate (longest + 1) '`' - let spacer = if (longest == 0) then "" else " " - let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty - plain <- asks envPlain - if plain - then return $ text str - else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs -inlineToMarkdown opts (Str str) = do - isPlain <- asks envPlain - let str' = (if isEnabled Ext_smart opts - then unsmartify opts - else id) $ - if isPlain - then str - else escapeString opts str - return $ text str' -inlineToMarkdown opts (Math InlineMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> - inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> text str <> "$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> text str <> "\\)" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> text str <> "\\\\)" - | otherwise -> do - plain <- asks envPlain - texMathToInlines InlineMath str >>= - inlineListToMarkdown opts . - (if plain then makeMathPlainer else id) -inlineToMarkdown opts (Math DisplayMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` - inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> text str <> "$$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> text str <> "\\]" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> text str <> "\\\\]" - | otherwise -> (\x -> cr <> x <> cr) `fmap` - (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) -inlineToMarkdown opts il@(RawInline f str) = do - plain <- asks envPlain - if not plain && - ( f == "markdown" || - (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || - (isEnabled Ext_raw_html opts && f == "html") ) - then return $ text str - else do - report $ InlineNotRendered il - return empty -inlineToMarkdown opts (LineBreak) = do - plain <- asks envPlain - if plain || isEnabled Ext_hard_line_breaks opts - then return cr - else return $ - if isEnabled Ext_escaped_line_breaks opts - then "\\" <> cr - else " " <> cr -inlineToMarkdown _ Space = do - escapeSpaces <- asks envEscapeSpaces - return $ if escapeSpaces then "\\ " else space -inlineToMarkdown opts SoftBreak = do - escapeSpaces <- asks envEscapeSpaces - let space' = if escapeSpaces then "\\ " else space - return $ case writerWrapText opts of - WrapNone -> space' - WrapAuto -> space' - WrapPreserve -> cr -inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst -inlineToMarkdown opts (Cite (c:cs) lst) - | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | otherwise = - if citationMode c == AuthorInText - then do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br - else do - cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" - where - joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) - convertOne Citation { citationId = k - , citationPrefix = pinlines - , citationSuffix = sinlines - , citationMode = m } - = do - pdoc <- inlineListToMarkdown opts pinlines - sdoc <- inlineListToMarkdown opts sinlines - let k' = text (modekey m ++ "@" ++ k) - r = case sinlines of - Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc - _ -> k' <+> sdoc - return $ pdoc <+> r - modekey SuppressAuthor = "-" - modekey _ = "" -inlineToMarkdown opts lnk@(Link attr txt (src, tit)) - | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && - attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [lnk]]) - | otherwise = do - plain <- asks envPlain - linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit - then empty - else text $ " \"" ++ tit ++ "\"" - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - let useAuto = isURI src && - case txt of - [Str s] | escapeURI s == srcSuffix -> True - _ -> False - let useRefLinks = writerReferenceLinks opts && not useAuto - shortcutable <- asks envRefShortcutable - let useShortcutRefLinks = shortcutable && - isEnabled Ext_shortcut_reference_links opts - ref <- if useRefLinks then getReference attr txt (src, tit) else return [] - reftext <- inlineListToMarkdown opts ref - return $ if useAuto - then if plain - then text srcSuffix - else "<" <> text srcSuffix <> ">" - else if useRefLinks - then let first = "[" <> linktext <> "]" - second = if txt == ref - then if useShortcutRefLinks - then "" - else "[]" - else "[" <> reftext <> "]" - in first <> second - else if plain - then linktext - else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" <> - linkAttributes opts attr -inlineToMarkdown opts img@(Image attr alternate (source, tit)) - | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && - attr /= nullAttr = -- use raw HTML - (text . trim) <$> writeHtml5String def (Pandoc nullMeta [Plain [img]]) - | otherwise = do - plain <- asks envPlain - let txt = if null alternate || alternate == [Str source] - -- to prevent autolinks - then [Str ""] - else alternate - linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) - return $ if plain - then "[" <> linkPart <> "]" - else "!" <> linkPart -inlineToMarkdown opts (Note contents) = do - modify (\st -> st{ stNotes = contents : stNotes st }) - st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (stNoteNum st + (length $ stNotes st) - 1) - if isEnabled Ext_footnotes opts - then return $ "[^" <> ref <> "]" - else return $ "[" <> ref <> "]" - -makeMathPlainer :: [Inline] -> [Inline] -makeMathPlainer = walk go - where - go (Emph xs) = Span nullAttr xs - go x = x - diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs deleted file mode 100644 index b7419ddf9..000000000 --- a/src/Text/Pandoc/Writers/Math.hs +++ /dev/null @@ -1,49 +0,0 @@ -module Text.Pandoc.Writers.Math - ( texMathToInlines - , convertMath - ) -where - -import Text.Pandoc.Class -import Text.Pandoc.Definition -import Text.Pandoc.Logging -import Text.TeXMath (Exp, writePandoc, DisplayType(..), readTeX) - --- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. --- Defaults to raw formula between @$@ or @$$@ characters if entire formula --- can't be converted. -texMathToInlines :: PandocMonad m - => MathType - -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> m [Inline] -texMathToInlines mt inp = do - res <- convertMath writePandoc mt inp - case res of - Right (Just ils) -> return ils - Right (Nothing) -> do - report $ CouldNotConvertTeXMath inp "" - return [mkFallback mt inp] - Left il -> return [il] - -mkFallback :: MathType -> String -> Inline -mkFallback mt str = Str (delim ++ str ++ delim) - where delim = case mt of - DisplayMath -> "$$" - InlineMath -> "$" - --- | Converts a raw TeX math formula using a writer function, --- issuing a warning and producing a fallback (a raw string) --- on failure. -convertMath :: PandocMonad m - => (DisplayType -> [Exp] -> a) -> MathType -> String - -> m (Either Inline a) -convertMath writer mt str = do - case writer dt <$> readTeX str of - Right r -> return (Right r) - Left e -> do - report $ CouldNotConvertTeXMath str e - return (Left $ mkFallback mt str) - where dt = case mt of - DisplayMath -> DisplayBlock - InlineMath -> DisplayInline - diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs deleted file mode 100644 index dc6206e6c..000000000 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ /dev/null @@ -1,442 +0,0 @@ -{- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to MediaWiki markup. - -MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> --} -module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) -import Network.URI ( isURI ) -import Control.Monad.Reader -import Control.Monad.State -import Text.Pandoc.Class (PandocMonad) - -data WriterState = WriterState { - stNotes :: Bool -- True if there are notes - , stOptions :: WriterOptions -- writer options - } - -data WriterReader = WriterReader { - options :: WriterOptions -- Writer options - , listLevel :: String -- String at beginning of list items, e.g. "**" - , useTags :: Bool -- True if we should use HTML tags because we're in a complex list - } - -type MediaWikiWriter = ReaderT WriterReader (State WriterState) - --- | Convert Pandoc to MediaWiki. -writeMediaWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeMediaWiki opts document = return $ - let initialState = WriterState { stNotes = False, stOptions = opts } - env = WriterReader { options = opts, listLevel = [], useTags = False } - in evalState (runReaderT (pandocToMediaWiki document) env) initialState - --- | Return MediaWiki representation of document. -pandocToMediaWiki :: Pandoc -> MediaWikiWriter String -pandocToMediaWiki (Pandoc meta blocks) = do - opts <- asks options - metadata <- metaToJSON opts - (fmap trimr . blockListToMediaWiki) - inlineListToMediaWiki - meta - body <- blockListToMediaWiki blocks - notesExist <- gets stNotes - let notes = if notesExist - then "\n<references />" - else "" - let main = body ++ notes - let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - --- | Escape special characters for MediaWiki. -escapeString :: String -> String -escapeString = escapeStringForXML - --- | Convert Pandoc block element to MediaWiki. -blockToMediaWiki :: Block -- ^ Block element - -> MediaWikiWriter String - -blockToMediaWiki Null = return "" - -blockToMediaWiki (Div attrs bs) = do - contents <- blockListToMediaWiki bs - return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ - contents ++ "\n\n" ++ "</div>" - -blockToMediaWiki (Plain inlines) = - inlineListToMediaWiki inlines - --- title beginning with fig: indicates that the image is a figure -blockToMediaWiki (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return "" - else ("|caption " ++) `fmap` inlineListToMediaWiki txt - img <- imageToMediaWiki attr - let opt = if null txt - then "" - else "|alt=" ++ if null tit then capt else tit ++ capt - return $ "[[File:" ++ src ++ "|frame|none" ++ img ++ opt ++ "]]\n" - -blockToMediaWiki (Para inlines) = do - tags <- asks useTags - lev <- asks listLevel - contents <- inlineListToMediaWiki inlines - return $ if tags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null lev then "\n" else "" - -blockToMediaWiki (LineBlock lns) = - blockToMediaWiki $ linesToPara lns - -blockToMediaWiki (RawBlock f str) - | f == Format "mediawiki" = return str - | f == Format "html" = return str - | otherwise = return "" - -blockToMediaWiki HorizontalRule = return "\n-----\n" - -blockToMediaWiki (Header level _ inlines) = do - contents <- inlineListToMediaWiki inlines - let eqs = replicate level '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" - -blockToMediaWiki (CodeBlock (_,classes,_) str) = do - let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", - "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", - "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", - "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", - "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", - "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", - "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", - "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - return $ - if null at - then "<pre" ++ (if null classes - then ">" - else " class=\"" ++ unwords classes ++ "\">") ++ - escapeString str ++ "</pre>" - else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>" - -- note: no escape! - -blockToMediaWiki (BlockQuote blocks) = do - contents <- blockListToMediaWiki blocks - return $ "<blockquote>" ++ contents ++ "</blockquote>" - -blockToMediaWiki (Table capt aligns widths headers rows') = do - caption <- if null capt - then return "" - else do - c <- inlineListToMediaWiki capt - return $ "|+ " ++ trimr c ++ "\n" - let headless = all null headers - let allrows = if headless then rows' else headers:rows' - tableBody <- intercalate "|-\n" `fmap` - mapM (tableRowToMediaWiki headless aligns widths) - (zip [1..] allrows) - return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" - -blockToMediaWiki x@(BulletList items) = do - tags <- fmap (|| not (isSimpleList x)) $ asks useTags - if tags - then do - contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ul>\n" ++ vcat contents ++ "</ul>\n" - else do - lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" - -blockToMediaWiki x@(OrderedList attribs items) = do - tags <- fmap (|| not (isSimpleList x)) $ asks useTags - if tags - then do - contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n" - else do - lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" - -blockToMediaWiki x@(DefinitionList items) = do - tags <- fmap (|| not (isSimpleList x)) $ asks useTags - if tags - then do - contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items - return $ "<dl>\n" ++ vcat contents ++ "</dl>\n" - else do - lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" - --- Auxiliary functions for lists: - --- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle - in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ - (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" - else "") - --- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: [Block] -> MediaWikiWriter String -listItemToMediaWiki items = do - contents <- blockListToMediaWiki items - tags <- asks useTags - if tags - then return $ "<li>" ++ contents ++ "</li>" - else do - marker <- asks listLevel - return $ marker ++ " " ++ contents - --- | Convert definition list item (label, list of blocks) to MediaWiki. -definitionListItemToMediaWiki :: ([Inline],[[Block]]) - -> MediaWikiWriter String -definitionListItemToMediaWiki (label, items) = do - labelText <- inlineListToMediaWiki label - contents <- mapM blockListToMediaWiki items - tags <- asks useTags - if tags - then return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) - else do - marker <- asks listLevel - return $ marker ++ " " ++ labelText ++ "\n" ++ - intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents) - --- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. -isSimpleList :: Block -> Bool -isSimpleList x = - case x of - BulletList items -> all isSimpleListItem items - OrderedList (num, sty, _) items -> all isSimpleListItem items && - num == 1 && sty `elem` [DefaultStyle, Decimal] - DefinitionList items -> all isSimpleListItem $ concatMap snd items - _ -> False - --- | True if list item can be handled with the simple wiki syntax. False if --- HTML tags will be needed. -isSimpleListItem :: [Block] -> Bool -isSimpleListItem [] = True -isSimpleListItem [x] = - case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False -isSimpleListItem [x, y] | isPlainOrPara x = - case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False -isSimpleListItem _ = False - -isPlainOrPara :: Block -> Bool -isPlainOrPara (Plain _) = True -isPlainOrPara (Para _) = True -isPlainOrPara _ = False - --- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" - --- Auxiliary functions for tables: - -tableRowToMediaWiki :: Bool - -> [Alignment] - -> [Double] - -> (Int, [[Block]]) - -> MediaWikiWriter String -tableRowToMediaWiki headless alignments widths (rownum, cells) = do - cells' <- mapM (tableCellToMediaWiki headless rownum) - $ zip3 alignments widths cells - return $ unlines cells' - -tableCellToMediaWiki :: Bool - -> Int - -> (Alignment, Double, [Block]) - -> MediaWikiWriter String -tableCellToMediaWiki headless rownum (alignment, width, bs) = do - contents <- blockListToMediaWiki bs - let marker = if rownum == 1 && not headless then "!" else "|" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let attrs = ["align=" ++ show (alignmentToString alignment) | - alignment /= AlignDefault && alignment /= AlignLeft] ++ - ["width=\"" ++ percent width ++ "\"" | - width /= 0.0 && rownum == 1] - let attr = if null attrs - then "" - else unwords attrs ++ "|" - let sep = case bs of - [Plain _] -> " " - [Para _] -> " " - _ -> "\n" - return $ marker ++ attr ++ sep ++ trimr contents - -alignmentToString :: Alignment -> String -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -imageToMediaWiki :: Attr -> MediaWikiWriter String -imageToMediaWiki attr = do - opts <- gets stOptions - let (_, cls, _) = attr - toPx = fmap (showInPixel opts) . checkPct - checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim - go (Just w) Nothing = '|':w ++ "px" - go (Just w) (Just h) = '|':w ++ "x" ++ h ++ "px" - go Nothing (Just h) = "|x" ++ h ++ "px" - go Nothing Nothing = "" - dims = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) - classes = if null cls - then "" - else "|class=" ++ unwords cls - return $ dims ++ classes - --- | Convert list of Pandoc block elements to MediaWiki. -blockListToMediaWiki :: [Block] -- ^ List of block elements - -> MediaWikiWriter String -blockListToMediaWiki blocks = - fmap vcat $ mapM blockToMediaWiki blocks - --- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String -inlineListToMediaWiki lst = - fmap concat $ mapM inlineToMediaWiki lst - --- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: Inline -> MediaWikiWriter String - -inlineToMediaWiki (Span attrs ils) = do - contents <- inlineListToMediaWiki ils - return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>" - -inlineToMediaWiki (Emph lst) = do - contents <- inlineListToMediaWiki lst - return $ "''" ++ contents ++ "''" - -inlineToMediaWiki (Strong lst) = do - contents <- inlineListToMediaWiki lst - return $ "'''" ++ contents ++ "'''" - -inlineToMediaWiki (Strikeout lst) = do - contents <- inlineListToMediaWiki lst - return $ "<s>" ++ contents ++ "</s>" - -inlineToMediaWiki (Superscript lst) = do - contents <- inlineListToMediaWiki lst - return $ "<sup>" ++ contents ++ "</sup>" - -inlineToMediaWiki (Subscript lst) = do - contents <- inlineListToMediaWiki lst - return $ "<sub>" ++ contents ++ "</sub>" - -inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst - -inlineToMediaWiki (Quoted SingleQuote lst) = do - contents <- inlineListToMediaWiki lst - return $ "\8216" ++ contents ++ "\8217" - -inlineToMediaWiki (Quoted DoubleQuote lst) = do - contents <- inlineListToMediaWiki lst - return $ "\8220" ++ contents ++ "\8221" - -inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst - -inlineToMediaWiki (Code _ str) = - return $ "<code>" ++ escapeString str ++ "</code>" - -inlineToMediaWiki (Str str) = return $ escapeString str - -inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>" - -- note: str should NOT be escaped - -inlineToMediaWiki (RawInline f str) - | f == Format "mediawiki" = return str - | f == Format "html" = return str - | otherwise = return "" - -inlineToMediaWiki LineBreak = return "<br />\n" - -inlineToMediaWiki SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - case wrapText of - WrapAuto -> return " " - WrapNone -> return " " - WrapPreserve -> return "\n" - -inlineToMediaWiki Space = return " " - -inlineToMediaWiki (Link _ txt (src, _)) = do - label <- inlineListToMediaWiki txt - case txt of - [Str s] | isURI src && escapeURI s == src -> return src - _ -> return $ if isURI src - then "[" ++ src ++ " " ++ label ++ "]" - else "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page - -inlineToMediaWiki (Image attr alt (source, tit)) = do - img <- imageToMediaWiki attr - alt' <- inlineListToMediaWiki alt - let txt = if null tit - then if null alt - then "" - else '|' : alt' - else '|' : tit - return $ "[[File:" ++ source ++ img ++ txt ++ "]]" - -inlineToMediaWiki (Note contents) = do - contents' <- blockListToMediaWiki contents - modify (\s -> s { stNotes = True }) - return $ "<ref>" ++ contents' ++ "</ref>" - -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs deleted file mode 100644 index 2421fd94d..000000000 --- a/src/Text/Pandoc/Writers/Native.hs +++ /dev/null @@ -1,79 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of a 'Pandoc' document to a string representation. --} -module Text.Pandoc.Writers.Native ( writeNative ) -where -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Data.List ( intersperse ) -import Text.Pandoc.Definition -import Text.Pandoc.Pretty -import Text.Pandoc.Class (PandocMonad) - -prettyList :: [Doc] -> Doc -prettyList ds = - "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> Doc -prettyBlock (LineBlock lines') = - "LineBlock" $$ prettyList (map (text . show) lines') -prettyBlock (BlockQuote blocks) = - "BlockQuote" $$ prettyList (map prettyBlock blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList" <> space <> text (show attribs) $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) -prettyBlock (BulletList blockLists) = - "BulletList" $$ - (prettyList $ map (prettyList . map prettyBlock) blockLists) -prettyBlock (DefinitionList items) = "DefinitionList" $$ - (prettyList $ map deflistitem items) - where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> - nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" -prettyBlock (Table caption aligns widths header rows) = - "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <> - text (show widths) $$ - prettyRow header $$ - prettyList (map prettyRow rows) - where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols) -prettyBlock (Div attr blocks) = - text ("Div " <> show attr) $$ prettyList (map prettyBlock blocks) -prettyBlock block = text $ show block - --- | Prettyprint Pandoc document. -writeNative :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeNative opts (Pandoc meta blocks) = return $ - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - withHead = case writerTemplate opts of - Just _ -> \bs -> text ("Pandoc (" ++ show meta ++ ")") $$ - bs $$ cr - Nothing -> id - in render colwidth $ withHead $ prettyList $ map prettyBlock blocks diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs deleted file mode 100644 index ee5fa4c24..000000000 --- a/src/Text/Pandoc/Writers/ODT.hs +++ /dev/null @@ -1,210 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables #-} -{- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to ODT. --} -module Text.Pandoc.Writers.ODT ( writeODT ) where -import Data.List ( isPrefixOf ) -import Data.Maybe ( fromMaybe ) -import Text.XML.Light.Output -import Text.TeXMath -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.UTF8 ( fromStringLazy ) -import Codec.Archive.Zip -import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) -import Text.Pandoc.Shared ( stringify ) -import Text.Pandoc.ImageSize -import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) -import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Text.Pandoc.Writers.Shared ( fixDisplayMath ) -import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) -import Control.Monad.State -import Control.Monad.Except (runExceptT) -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.XML -import Text.Pandoc.Pretty -import System.FilePath ( takeExtension, takeDirectory, (<.>)) -import Text.Pandoc.Class ( PandocMonad, report ) -import qualified Text.Pandoc.Class as P -import Text.Pandoc.Logging - -data ODTState = ODTState { stEntries :: [Entry] - } - -type O m = StateT ODTState m - --- | Produce an ODT file from a Pandoc document. -writeODT :: PandocMonad m - => WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> m B.ByteString -writeODT opts doc = - let initState = ODTState{ stEntries = [] - } - in - evalStateT (pandocToODT opts doc) initState - --- | Produce an ODT file from a Pandoc document. -pandocToODT :: PandocMonad m - => WriterOptions -- ^ Writer options - -> Pandoc -- ^ Document to convert - -> O m B.ByteString -pandocToODT opts doc@(Pandoc meta _) = do - let datadir = writerUserDataDir opts - let title = docTitle meta - refArchive <- - case writerReferenceDoc opts of - Just f -> liftM toArchive $ lift $ P.readFileLazy f - Nothing -> lift $ (toArchive . B.fromStrict) <$> - P.readDataFile datadir "reference.odt" - -- handle formulas and pictures - -- picEntriesRef <- P.newIORef ([] :: [Entry]) - doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc - newContents <- lift $ writeOpenDocument opts{writerWrapText = WrapNone} doc' - epochtime <- floor `fmap` (lift P.getPOSIXTime) - let contentEntry = toEntry "content.xml" epochtime - $ fromStringLazy newContents - picEntries <- gets stEntries - let archive = foldr addEntryToArchive refArchive - $ contentEntry : picEntries - -- construct META-INF/manifest.xml based on archive - let toFileEntry fp = case getMimeType fp of - Nothing -> empty - Just m -> selfClosingTag "manifest:file-entry" - [("manifest:media-type", m) - ,("manifest:full-path", fp) - ,("manifest:version", "1.2") - ] - let files = [ ent | ent <- filesInArchive archive, - not ("META-INF" `isPrefixOf` ent) ] - let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive, - "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ] - let manifestEntry = toEntry "META-INF/manifest.xml" epochtime - $ fromStringLazy $ render Nothing - $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" - $$ - ( inTags True "manifest:manifest" - [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") - ,("manifest:version","1.2")] - $ ( selfClosingTag "manifest:file-entry" - [("manifest:media-type","application/vnd.oasis.opendocument.text") - ,("manifest:full-path","/")] - $$ vcat ( map toFileEntry $ files ) - $$ vcat ( map toFileEntry $ formulas ) - ) - ) - let archive' = addEntryToArchive manifestEntry archive - let metaEntry = toEntry "meta.xml" epochtime - $ fromStringLazy $ render Nothing - $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" - $$ - ( inTags True "office:document-meta" - [("xmlns:office","urn:oasis:names:tc:opendocument:xmlns:office:1.0") - ,("xmlns:xlink","http://www.w3.org/1999/xlink") - ,("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:meta","urn:oasis:names:tc:opendocument:xmlns:meta:1.0") - ,("xmlns:ooo","http://openoffice.org/2004/office") - ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") - ,("office:version","1.2")] - $ ( inTagsSimple "office:meta" - $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title)) - ) - ) - ) - -- make sure mimetype is first - let mimetypeEntry = toEntry "mimetype" epochtime - $ fromStringLazy "application/vnd.oasis.opendocument.text" - let archive'' = addEntryToArchive mimetypeEntry - $ addEntryToArchive metaEntry archive' - return $ fromArchive archive'' - --- | transform both Image and Math elements -transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline -transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = do - res <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src - case res of - Left (_ :: PandocError) -> do - report $ CouldNotFetchResource src "" - return $ Emph lab - Right (img, mbMimeType) -> do - (ptX, ptY) <- case imageSize img of - Right s -> return $ sizeInPoints s - Left msg -> do - report $ CouldNotDetermineImageSize src msg - return (100, 100) - let dims = - case (getDim Width, getDim Height) of - (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent _), Nothing) -> [("width", show w), ("style:rel-height", "scale")] - (Nothing, Just h@(Percent _)) -> [("style:rel-width", "scale"), ("height", show h)] - (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] - (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] - _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] - where - ratio = ptX / ptY - getDim dir = case (dimension dir attr) of - Just (Percent i) -> Just $ Percent i - Just dim -> Just $ Inch $ inInch opts dim - Nothing -> Nothing - let newattr = (id', cls, dims) - entries <- gets stEntries - let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) - (mbMimeType >>= extensionFromMimeType) - let newsrc = "Pictures/" ++ show (length entries) <.> extension - let toLazy = B.fromChunks . (:[]) - epochtime <- floor `fmap` (lift P.getPOSIXTime) - let entry = toEntry newsrc epochtime $ toLazy img - modify $ \st -> st{ stEntries = entry : entries } - return $ Image newattr lab (newsrc, t) -transformPicMath _ (Math t math) = do - entries <- gets stEntries - let dt = if t == InlineMath then DisplayInline else DisplayBlock - case writeMathML dt <$> readTeX math of - Left _ -> return $ Math t math - Right r -> do - let conf = useShortEmptyTags (const False) defaultConfigPP - let mathml = ppcTopElement conf r - epochtime <- floor `fmap` (lift $ P.getPOSIXTime) - let dirname = "Formula-" ++ show (length entries) ++ "/" - let fname = dirname ++ "content.xml" - let entry = toEntry fname epochtime (fromStringLazy mathml) - modify $ \st -> st{ stEntries = entry : entries } - return $ RawInline (Format "opendocument") $ render Nothing $ - inTags False "draw:frame" [("text:anchor-type", - if t == DisplayMath - then "paragraph" - else "as-char") - ,("style:vertical-pos", "middle") - ,("style:vertical-rel", "text")] $ - selfClosingTag "draw:object" [("xlink:href", dirname) - , ("xlink:type", "simple") - , ("xlink:show", "embed") - , ("xlink:actuate", "onLoad")] - -transformPicMath _ x = return x diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs deleted file mode 100644 index bc0cfc300..000000000 --- a/src/Text/Pandoc/Writers/OPML.hs +++ /dev/null @@ -1,103 +0,0 @@ -{-# LANGUAGE CPP #-} -{- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to OPML XML. --} -module Text.Pandoc.Writers.OPML ( writeOPML) where -import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Markdown (writeMarkdown) -import Text.Pandoc.Pretty -import Text.Pandoc.Compat.Time -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Error -import Control.Monad.Except (throwError) -import Text.Pandoc.Class (PandocMonad) - --- | Convert Pandoc document to string in OPML format. -writeOPML :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeOPML opts (Pandoc meta blocks) = do - let elements = hierarchicalize blocks - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta - metadata <- metaToJSON opts - (writeMarkdown def . Pandoc nullMeta) - (\ils -> trimr <$> (writeMarkdown def $ Pandoc nullMeta [Plain ils])) - meta' - main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements) - let context = defField "body" main metadata - return $ case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - - -writeHtmlInlines :: PandocMonad m => [Inline] -> m String -writeHtmlInlines ils = - trim <$> (writeHtml5String def $ Pandoc nullMeta [Plain ils]) - --- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT -showDateTimeRFC822 :: UTCTime -> String -showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" - -convertDate :: [Inline] -> String -convertDate ils = maybe "" showDateTimeRFC822 $ -#if MIN_VERSION_time(1,5,0) - parseTimeM True -#else - parseTime -#endif - defaultTimeLocale "%F" =<< (normalizeDate $ stringify ils) - --- | Convert an Element to OPML. -elementToOPML :: PandocMonad m => WriterOptions -> Element -> m Doc -elementToOPML _ (Blk _) = return empty -elementToOPML opts (Sec _ _num _ title elements) = do - let isBlk :: Element -> Bool - isBlk (Blk _) = True - isBlk _ = False - - fromBlk :: PandocMonad m => Element -> m Block - fromBlk (Blk x) = return x - fromBlk _ = throwError $ PandocSomeError "fromBlk called on non-block" - - (blocks, rest) = span isBlk elements - htmlIls <- writeHtmlInlines title - md <- if null blocks - then return [] - else do blks <- mapM fromBlk blocks - writeMarkdown def $ Pandoc nullMeta blks - let attrs = [("text", htmlIls)] ++ [("_note", md) | not (null blocks)] - o <- mapM (elementToOPML opts) rest - return $ inTags True "outline" attrs $ vcat o diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs deleted file mode 100644 index 851e18b8e..000000000 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ /dev/null @@ -1,626 +0,0 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} -{- -Copyright (C) 2008-2015 Andrea Rossato <andrea.rossato@ing.unitn.it> - and John MacFarlane. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2015 Andrea Rossato and John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to OpenDocument XML. --} -module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.XML -import Text.Pandoc.Shared (linesToPara) -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Writers.Math -import Text.Pandoc.Pretty -import Text.Printf ( printf ) -import Control.Arrow ( (***), (>>>) ) -import Control.Monad.State hiding ( when ) -import Data.Char (chr) -import qualified Data.Set as Set -import qualified Data.Map as Map -import Text.Pandoc.Writers.Shared -import Data.List (sortBy) -import Data.Ord (comparing) -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - --- | Auxiliary function to convert Plain block to Para. -plainToPara :: Block -> Block -plainToPara (Plain x) = Para x -plainToPara x = x - --- --- OpenDocument writer --- - -type OD m = StateT WriterState m - -data WriterState = - WriterState { stNotes :: [Doc] - , stTableStyles :: [Doc] - , stParaStyles :: [Doc] - , stListStyles :: [(Int, [Doc])] - , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc) - , stTextStyleAttr :: Set.Set TextStyle - , stIndentPara :: Int - , stInDefinition :: Bool - , stTight :: Bool - , stFirstPara :: Bool - , stImageId :: Int - } - -defaultWriterState :: WriterState -defaultWriterState = - WriterState { stNotes = [] - , stTableStyles = [] - , stParaStyles = [] - , stListStyles = [] - , stTextStyles = Map.empty - , stTextStyleAttr = Set.empty - , stIndentPara = 0 - , stInDefinition = False - , stTight = False - , stFirstPara = False - , stImageId = 1 - } - -when :: Bool -> Doc -> Doc -when p a = if p then a else empty - -addTableStyle :: PandocMonad m => Doc -> OD m () -addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } - -addNote :: PandocMonad m => Doc -> OD m () -addNote i = modify $ \s -> s { stNotes = i : stNotes s } - -addParaStyle :: PandocMonad m => Doc -> OD m () -addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } - -addTextStyle :: PandocMonad m => Set.Set TextStyle -> (String, Doc) -> OD m () -addTextStyle attrs i = modify $ \s -> - s { stTextStyles = Map.insert attrs i (stTextStyles s) } - -addTextStyleAttr :: PandocMonad m => TextStyle -> OD m () -addTextStyleAttr t = modify $ \s -> - s { stTextStyleAttr = Set.insert t (stTextStyleAttr s) } - -increaseIndent :: PandocMonad m => OD m () -increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } - -resetIndent :: PandocMonad m => OD m () -resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } - -inTightList :: PandocMonad m => OD m a -> OD m a -inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> - modify (\s -> s { stTight = False }) >> return r - -setInDefinitionList :: PandocMonad m => Bool -> OD m () -setInDefinitionList b = modify $ \s -> s { stInDefinition = b } - -setFirstPara :: PandocMonad m => OD m () -setFirstPara = modify $ \s -> s { stFirstPara = True } - -inParagraphTags :: PandocMonad m => Doc -> OD m Doc -inParagraphTags d | isEmpty d = return empty -inParagraphTags d = do - b <- gets stFirstPara - a <- if b - then do modify $ \st -> st { stFirstPara = False } - return $ [("text:style-name", "First_20_paragraph")] - else return [("text:style-name", "Text_20_body")] - return $ inTags False "text:p" a d - -inParagraphTagsWithStyle :: String -> Doc -> Doc -inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] - -inSpanTags :: String -> Doc -> Doc -inSpanTags s = inTags False "text:span" [("text:style-name",s)] - -withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a -withTextStyle s f = do - oldTextStyleAttr <- gets stTextStyleAttr - addTextStyleAttr s - res <- f - modify $ \st -> st{ stTextStyleAttr = oldTextStyleAttr } - return res - -inTextStyle :: PandocMonad m => Doc -> OD m Doc -inTextStyle d = do - at <- gets stTextStyleAttr - if Set.null at - then return d - else do - styles <- gets stTextStyles - case Map.lookup at styles of - Just (styleName, _) -> return $ - inTags False "text:span" [("text:style-name",styleName)] d - Nothing -> do - let styleName = "T" ++ show (Map.size styles + 1) - addTextStyle at (styleName, - inTags False "style:style" - [("style:name", styleName) - ,("style:family", "text")] - $ selfClosingTag "style:text-properties" - (concatMap textStyleAttr (Set.toList at))) - return $ inTags False - "text:span" [("text:style-name",styleName)] d - -inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc -inHeaderTags i d = - return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] d - -inQuotes :: QuoteType -> Doc -> Doc -inQuotes SingleQuote s = char '\8216' <> s <> char '\8217' -inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221' - -handleSpaces :: String -> Doc -handleSpaces s - | ( ' ':_) <- s = genTag s - | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x - | otherwise = rm s - where - genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>) - tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)] - rm ( ' ':xs) = char ' ' <> genTag xs - rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs - rm ( x:xs) = char x <> rm xs - rm [] = empty - --- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeOpenDocument opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - let render' = render colwidth - ((body, metadata),s) <- flip runStateT - defaultWriterState $ do - m <- metaToJSON opts - (fmap (render colwidth) . blocksToOpenDocument opts) - (fmap (render colwidth) . inlinesToOpenDocument opts) - meta - b <- render' `fmap` blocksToOpenDocument opts blocks - return (b, m) - let styles = stTableStyles s ++ stParaStyles s ++ - map snd (reverse $ sortBy (comparing fst) $ - Map.elems (stTextStyles s)) - listStyle (n,l) = inTags True "text:list-style" - [("style:name", "L" ++ show n)] (vcat l) - let listStyles = map listStyle (stListStyles s) - let automaticStyles = vcat $ reverse $ styles ++ listStyles - let context = defField "body" body - $ defField "automatic-styles" (render' automaticStyles) - $ metadata - return $ case writerTemplate opts of - Nothing -> body - Just tpl -> renderTemplate' tpl context - -withParagraphStyle :: PandocMonad m - => WriterOptions -> String -> [Block] -> OD m Doc -withParagraphStyle o s (b:bs) - | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where go i = (<>) i <$> withParagraphStyle o s bs -withParagraphStyle _ _ [] = return empty - -inPreformattedTags :: PandocMonad m => String -> OD m Doc -inPreformattedTags s = do - n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] - return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s - -orderedListToOpenDocument :: PandocMonad m - => WriterOptions -> Int -> [[Block]] -> OD m Doc -orderedListToOpenDocument o pn bs = - vcat . map (inTagsIndented "text:list-item") <$> - mapM (orderedItemToOpenDocument o pn . map plainToPara) bs - -orderedItemToOpenDocument :: PandocMonad m - => WriterOptions -> Int -> [Block] -> OD m Doc -orderedItemToOpenDocument o n (b:bs) - | OrderedList a l <- b = newLevel a l - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where - go i = ($$) i <$> orderedItemToOpenDocument o n bs - newLevel a l = do - nn <- length <$> gets stParaStyles - ls <- head <$> gets stListStyles - modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) } - inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l -orderedItemToOpenDocument _ _ [] = return empty - -isTightList :: [[Block]] -> Bool -isTightList [] = False -isTightList (b:_) - | Plain {} : _ <- b = True - | otherwise = False - -newOrderedListStyle :: PandocMonad m - => Bool -> ListAttributes -> OD m (Int,Int) -newOrderedListStyle b a = do - ln <- (+) 1 . length <$> gets stListStyles - let nbs = orderedListLevelStyle a (ln, []) - pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln - modify $ \s -> s { stListStyles = nbs : stListStyles s } - return (ln,pn) - -bulletListToOpenDocument :: PandocMonad m - => WriterOptions -> [[Block]] -> OD m Doc -bulletListToOpenDocument o b = do - ln <- (+) 1 . length <$> gets stListStyles - (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln - modify $ \s -> s { stListStyles = ns : stListStyles s } - is <- listItemsToOpenDocument ("P" ++ show pn) o b - return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is - -listItemsToOpenDocument :: PandocMonad m - => String -> WriterOptions -> [[Block]] -> OD m Doc -listItemsToOpenDocument s o is = - vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is - -deflistItemToOpenDocument :: PandocMonad m - => WriterOptions -> ([Inline],[[Block]]) -> OD m Doc -deflistItemToOpenDocument o (t,d) = do - let ts = if isTightList d - then "Definition_20_Term_20_Tight" else "Definition_20_Term" - ds = if isTightList d - then "Definition_20_Definition_20_Tight" else "Definition_20_Definition" - t' <- withParagraphStyle o ts [Para t] - d' <- liftM vcat $ mapM (withParagraphStyle o ds . (map plainToPara)) d - return $ t' $$ d' - -inBlockQuote :: PandocMonad m - => WriterOptions -> Int -> [Block] -> OD m Doc -inBlockQuote o i (b:bs) - | BlockQuote l <- b = do increaseIndent - ni <- paraStyle - [("style:parent-style-name","Quotations")] - go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l - | otherwise = do go =<< blockToOpenDocument o b - where go block = ($$) block <$> inBlockQuote o i bs -inBlockQuote _ _ [] = resetIndent >> return empty - --- | Convert a list of Pandoc blocks to OpenDocument. -blocksToOpenDocument :: PandocMonad m => WriterOptions -> [Block] -> OD m Doc -blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b - --- | Convert a Pandoc block element to OpenDocument. -blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m Doc -blockToOpenDocument o bs - | Plain b <- bs = if null b - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs - = figure attr c s t - | Para b <- bs = if null b - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div _ xs <- bs = blocksToOpenDocument o xs - | Header i _ b <- bs = setFirstPara >> - (inHeaderTags i =<< inlinesToOpenDocument o b) - | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b - | DefinitionList b <- bs = setFirstPara >> defList b - | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b - | OrderedList a b <- bs = setFirstPara >> orderedList a b - | CodeBlock _ s <- bs = setFirstPara >> preformatted s - | Table c a w h r <- bs = setFirstPara >> table c a w h r - | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" - [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == Format "opendocument" - then return $ text s - else do - report $ BlockNotRendered bs - return empty - | Null <- bs = return empty - | otherwise = return empty - where - defList b = do setInDefinitionList True - r <- vcat <$> mapM (deflistItemToOpenDocument o) b - setInDefinitionList False - return r - preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) - mkBlockQuote b = do increaseIndent - i <- paraStyle - [("style:parent-style-name","Quotations")] - inBlockQuote o i (map plainToPara b) - orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a - inTags True "text:list" [ ("text:style-name", "L" ++ show ln)] - <$> orderedListToOpenDocument o pn b - table c a w h r = do - tn <- length <$> gets stTableStyles - pn <- length <$> gets stParaStyles - let genIds = map chr [65..] - name = "Table" ++ show (tn + 1) - columnIds = zip genIds w - mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])] - columns = map mkColumn columnIds - paraHStyles = paraTableStyles "Heading" pn a - paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a - newPara = map snd . filter (not . isEmpty . snd) - addTableStyle $ tableStyle tn columnIds - mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles - captionDoc <- if null c - then return empty - else withParagraphStyle o "Table" [Para c] - th <- if all null h - then return empty - else colHeadsToOpenDocument o name (map fst paraHStyles) h - tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r - return $ inTags True "table:table" [ ("table:name" , name) - , ("table:style-name", name) - ] (vcat columns $$ th $$ vcat tr) $$ captionDoc - figure attr caption source title | null caption = - withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]] - | otherwise = do - imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]] - captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] - return $ imageDoc $$ captionDoc - -colHeadsToOpenDocument :: PandocMonad m - => WriterOptions -> String -> [String] -> [[Block]] - -> OD m Doc -colHeadsToOpenDocument o tn ns hs = - inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns hs) - -tableRowToOpenDocument :: PandocMonad m - => WriterOptions -> String -> [String] -> [[Block]] - -> OD m Doc -tableRowToOpenDocument o tn ns cs = - inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns cs) - -tableItemToOpenDocument :: PandocMonad m - => WriterOptions -> String -> (String,[Block]) - -> OD m Doc -tableItemToOpenDocument o tn (n,i) = - let a = [ ("table:style-name" , tn ++ ".A1" ) - , ("office:value-type", "string" ) - ] - in inTags True "table:table-cell" a <$> - withParagraphStyle o n (map plainToPara i) - --- | Convert a list of inline elements to OpenDocument. -inlinesToOpenDocument :: PandocMonad m => WriterOptions -> [Inline] -> OD m Doc -inlinesToOpenDocument o l = hcat <$> toChunks o l - -toChunks :: PandocMonad m => WriterOptions -> [Inline] -> OD m [Doc] -toChunks _ [] = return [] -toChunks o (x : xs) - | isChunkable x = do - contents <- (inTextStyle . hcat) =<< - mapM (inlineToOpenDocument o) (x:ys) - rest <- toChunks o zs - return (contents : rest) - | otherwise = do - contents <- inlineToOpenDocument o x - rest <- toChunks o xs - return (contents : rest) - where (ys, zs) = span isChunkable xs - -isChunkable :: Inline -> Bool -isChunkable (Str _) = True -isChunkable Space = True -isChunkable SoftBreak = True -isChunkable _ = False - --- | Convert an inline element to OpenDocument. -inlineToOpenDocument :: PandocMonad m => WriterOptions -> Inline -> OD m Doc -inlineToOpenDocument o ils - = case ils of - Space -> return space - SoftBreak - | writerWrapText o == WrapPreserve - -> return $ preformatted "\n" - | otherwise -> return $ space - Span _ xs -> inlinesToOpenDocument o xs - LineBreak -> return $ selfClosingTag "text:line-break" [] - Str s -> return $ handleSpaces $ escapeStringForXML s - Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l - Strong l -> withTextStyle Bold $ inlinesToOpenDocument o l - Strikeout l -> withTextStyle Strike $ inlinesToOpenDocument o l - Superscript l -> withTextStyle Sup $ inlinesToOpenDocument o l - Subscript l -> withTextStyle Sub $ inlinesToOpenDocument o l - SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l - Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l - Code _ s -> inlinedCode $ preformatted s - Math t s -> lift (texMathToInlines t s) >>= - inlinesToOpenDocument o - Cite _ l -> inlinesToOpenDocument o l - RawInline f s -> if f == Format "opendocument" - then return $ text s - else do - report $ InlineNotRendered ils - return empty - Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l - Image attr _ (s,t) -> mkImg attr s t - Note l -> mkNote l - where - preformatted s = handleSpaces $ escapeStringForXML s - inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s - mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") - , ("xlink:href" , s ) - , ("office:name", t ) - ] . inSpanTags "Definition" - mkImg (_, _, kvs) s _ = do - id' <- gets stImageId - modify (\st -> st{ stImageId = id' + 1 }) - let getDims [] = [] - getDims (("width", w) :xs) = ("svg:width", w) : getDims xs - getDims (("height", h):xs) = ("svg:height", h) : getDims xs - getDims (x@("style:rel-width", _) :xs) = x : getDims xs - getDims (x@("style:rel-height", _):xs) = x : getDims xs - getDims (_:xs) = getDims xs - return $ inTags False "draw:frame" - (("draw:name", "img" ++ show id') : getDims kvs) $ - selfClosingTag "draw:image" [ ("xlink:href" , s ) - , ("xlink:type" , "simple") - , ("xlink:show" , "embed" ) - , ("xlink:actuate", "onLoad")] - mkNote l = do - n <- length <$> gets stNotes - let footNote t = inTags False "text:note" - [ ("text:id" , "ftn" ++ show n) - , ("text:note-class", "footnote" )] $ - inTagsSimple "text:note-citation" (text . show $ n + 1) <> - inTagsSimple "text:note-body" t - nn <- footNote <$> withParagraphStyle o "Footnote" l - addNote nn - return nn - -bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc])) -bulletListStyle l = do - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,8227,8259] - listElStyle = map doStyles [0..9] - pn <- paraListStyle l - return (pn, (l, listElStyle)) - -orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) -orderedListLevelStyle (s,n, d) (l,ls) = - let suffix = case d of - OneParen -> [("style:num-suffix", ")")] - TwoParens -> [("style:num-prefix", "(") - ,("style:num-suffix", ")")] - _ -> [("style:num-suffix", ".")] - format = case n of - UpperAlpha -> "A" - LowerAlpha -> "a" - UpperRoman -> "I" - LowerRoman -> "i" - _ -> "1" - listStyle = inTags True "text:list-level-style-number" - ([ ("text:level" , show $ 1 + length ls ) - , ("text:style-name" , "Numbering_20_Symbols") - , ("style:num-format", format ) - , ("text:start-value", show s ) - ] ++ suffix) (listLevelStyle (1 + length ls)) - in (l, ls ++ [listStyle]) - -listLevelStyle :: Int -> Doc -listLevelStyle i = - let indent = show (0.25 * fromIntegral i :: Double) in - selfClosingTag "style:list-level-properties" - [ ("text:space-before" , indent ++ "in") - , ("text:min-label-width", "0.25in")] - -tableStyle :: Int -> [(Char,Double)] -> Doc -tableStyle num wcs = - let tableId = "Table" ++ show (num + 1) - table = inTags True "style:style" - [("style:name", tableId) - ,("style:family", "table")] $ - selfClosingTag "style:table-properties" - [("table:align" , "center")] - colStyle (c,0) = selfClosingTag "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) - , ("style:family", "table-column" )] - colStyle (c,w) = inTags True "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) - , ("style:family", "table-column" )] $ - selfClosingTag "style:table-column-properties" - [("style:rel-column-width", printf "%d*" $ (floor $ w * 65535 :: Integer))] - cellStyle = inTags True "style:style" - [ ("style:name" , tableId ++ ".A1") - , ("style:family", "table-cell" )] $ - selfClosingTag "style:table-cell-properties" - [ ("fo:border", "none")] - columnStyles = map colStyle wcs - in table $$ vcat columnStyles $$ cellStyle - -paraStyle :: PandocMonad m => [(String,String)] -> OD m Int -paraStyle attrs = do - pn <- (+) 1 . length <$> gets stParaStyles - i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara - b <- gets stInDefinition - t <- gets stTight - let styleAttr = [ ("style:name" , "P" ++ show pn) - , ("style:family" , "paragraph" )] - indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i - tight = if t then [ ("fo:margin-top" , "0in" ) - , ("fo:margin-bottom" , "0in" )] - else [] - indent = if (i /= 0 || b) - then [ ("fo:margin-left" , indentVal) - , ("fo:margin-right" , "0in" ) - , ("fo:text-indent" , "0in" ) - , ("style:auto-text-indent" , "false" )] - else [] - attributes = indent ++ tight - paraProps = when (not $ null attributes) $ - selfClosingTag "style:paragraph-properties" attributes - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps - return pn - -paraListStyle :: PandocMonad m => Int -> OD m Int -paraListStyle l = paraStyle - [("style:parent-style-name","Text_20_body") - ,("style:list-style-name", "L" ++ show l )] - -paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)] -paraTableStyles _ _ [] = [] -paraTableStyles t s (a:xs) - | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs - | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs - | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs - where pName sn = "P" ++ show (sn + 1) - res sn x = inTags True "style:style" - [ ("style:name" , pName sn ) - , ("style:family" , "paragraph" ) - , ("style:parent-style-name", "Table_20_" ++ t)] $ - selfClosingTag "style:paragraph-properties" - [ ("fo:text-align", x) - , ("style:justify-single-word", "false")] - -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre - deriving ( Eq,Ord ) - -textStyleAttr :: TextStyle -> [(String,String)] -textStyleAttr s - | Italic <- s = [("fo:font-style" ,"italic" ) - ,("style:font-style-asian" ,"italic" ) - ,("style:font-style-complex" ,"italic" )] - | Bold <- s = [("fo:font-weight" ,"bold" ) - ,("style:font-weight-asian" ,"bold" ) - ,("style:font-weight-complex" ,"bold" )] - | Strike <- s = [("style:text-line-through-style", "solid" )] - | Sub <- s = [("style:text-position" ,"sub 58%" )] - | Sup <- s = [("style:text-position" ,"super 58%" )] - | SmallC <- s = [("fo:font-variant" ,"small-caps")] - | Pre <- s = [("style:font-name" ,"Courier New") - ,("style:font-name-asian" ,"Courier New") - ,("style:font-name-complex" ,"Courier New")] - | otherwise = [] diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs deleted file mode 100644 index 55d3fe656..000000000 --- a/src/Text/Pandoc/Writers/Org.hs +++ /dev/null @@ -1,411 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>, - and John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010-2015 Puneeth Chaganti and John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : Puneeth Chaganti <punchagan@gmail.com> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Emacs Org-Mode. - -Org-Mode: <http://orgmode.org> --} -module Text.Pandoc.Writers.Org ( writeOrg) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Pretty -import Text.Pandoc.Templates (renderTemplate') -import Data.Char ( isAlphaNum, toLower ) -import Data.List ( isPrefixOf, intersect, intersperse, partition, transpose ) -import Control.Monad.State -import Text.Pandoc.Class (PandocMonad) - -data WriterState = - WriterState { stNotes :: [[Block]] - , stHasMath :: Bool - , stOptions :: WriterOptions - } - --- | Convert Pandoc to Org. -writeOrg :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeOrg opts document = return $ - let st = WriterState { stNotes = [], - stHasMath = False, - stOptions = opts } - in evalState (pandocToOrg document) st - --- | Return Org representation of document. -pandocToOrg :: Pandoc -> State WriterState String -pandocToOrg (Pandoc meta blocks) = do - opts <- liftM stOptions get - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToOrg) - (fmap (render colwidth) . inlineListToOrg) - meta - body <- blockListToOrg blocks - notes <- liftM (reverse . stNotes) get >>= notesToOrg - hasMath <- liftM stHasMath get - let main = render colwidth $ foldl ($+$) empty $ [body, notes] - let context = defField "body" main - $ defField "math" hasMath - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - --- | Return Org representation of notes. -notesToOrg :: [[Block]] -> State WriterState Doc -notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= - return . vsep - --- | Return Org representation of a note. -noteToOrg :: Int -> [Block] -> State WriterState Doc -noteToOrg num note = do - contents <- blockListToOrg note - let marker = "[fn:" ++ show num ++ "] " - return $ hang (length marker) (text marker) contents - --- | Escape special characters for Org. -escapeString :: String -> String -escapeString = escapeStringUsing $ - [ ('\x2014',"---") - , ('\x2013',"--") - , ('\x2019',"'") - , ('\x2026',"...") - ] ++ backslashEscapes "^_" - -isRawFormat :: Format -> Bool -isRawFormat f = - f == Format "latex" || f == Format "tex" || f == Format "org" - --- | Convert Pandoc block element to Org. -blockToOrg :: Block -- ^ Block element - -> State WriterState Doc -blockToOrg Null = return empty -blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do - contents <- blockListToOrg bs - let drawerNameTag = ":" <> text cls <> ":" - let keys = vcat $ map (\(k,v) -> - ":" <> text k <> ":" - <> space <> text v) kvs - let drawerEndTag = text ":END:" - return $ drawerNameTag $$ cr $$ keys $$ - blankline $$ contents $$ - blankline $$ drawerEndTag $$ - blankline -blockToOrg (Div attrs bs) = do - contents <- blockListToOrg bs - let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower - return $ case attrs of - ("", [], []) -> - -- nullAttr, treat contents as if it wasn't wrapped - blankline $$ contents $$ blankline - (ident, [], []) -> - -- only an id: add id as an anchor, unwrap the rest - blankline $$ "<<" <> text ident <> ">>" $$ contents $$ blankline - (ident, classes, kv) -> - -- if one class looks like the name of a greater block then output as - -- such: The ID, if present, is added via the #+NAME keyword; other - -- classes and key-value pairs are kept as #+ATTR_HTML attributes. - let - (blockTypeCand, classes') = partition isGreaterBlockClass classes - in case blockTypeCand of - (blockType:classes'') -> - blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ - "#+BEGIN_" <> text blockType $$ contents $$ - "#+END_" <> text blockType $$ blankline - _ -> - -- fallback: wrap in div tags - let - startTag = tagWithAttrs "div" attrs - endTag = text "</div>" - in blankline $$ "#+BEGIN_HTML" $$ - nest 2 startTag $$ "#+END_HTML" $$ blankline $$ - contents $$ blankline $$ "#+BEGIN_HTML" $$ - nest 2 endTag $$ "#+END_HTML" $$ blankline -blockToOrg (Plain inlines) = inlineListToOrg inlines --- title beginning with fig: indicates that the image is a figure -blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt - img <- inlineToOrg (Image attr txt (src,tit)) - return $ capt $$ img $$ blankline -blockToOrg (Para inlines) = do - contents <- inlineListToOrg inlines - return $ contents <> blankline -blockToOrg (LineBlock lns) = do - let splitStanza [] = [] - splitStanza xs = case break (== mempty) xs of - (l, []) -> l : [] - (l, _:r) -> l : splitStanza r - let joinWithLinefeeds = nowrap . mconcat . intersperse cr - let joinWithBlankLines = mconcat . intersperse blankline - let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToOrg ls - contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns) - return $ blankline $$ "#+BEGIN_VERSE" $$ - nest 2 contents $$ "#+END_VERSE" <> blankline -blockToOrg (RawBlock "html" str) = - return $ blankline $$ "#+BEGIN_HTML" $$ - nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | isRawFormat f = - return $ text str -blockToOrg (RawBlock _ _) = return empty -blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline -blockToOrg (Header level attr inlines) = do - contents <- inlineListToOrg inlines - let headerStr = text $ if level > 999 then " " else replicate level '*' - let drawerStr = if attr == nullAttr - then empty - else cr <> nest (level + 1) (propertiesDrawer attr) - return $ headerStr <> " " <> contents <> drawerStr <> blankline -blockToOrg (CodeBlock (_,classes,_) str) = do - opts <- stOptions <$> get - let tabstop = writerTabStop opts - let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers - let (beg, end) = case at of - [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") - (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC") - return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline -blockToOrg (BlockQuote blocks) = do - contents <- blockListToOrg blocks - return $ blankline $$ "#+BEGIN_QUOTE" $$ - nest 2 contents $$ "#+END_QUOTE" $$ blankline -blockToOrg (Table caption' _ _ headers rows) = do - caption'' <- inlineListToOrg caption' - let caption = if null caption' - then empty - else ("#+CAPTION: " <> caption'') - headers' <- mapM blockListToOrg headers - rawRows <- mapM (mapM blockListToOrg) rows - let numChars = maximum . map offset - -- FIXME: width is not being used. - let widthsInChars = - map ((+2) . numChars) $ transpose (headers' : rawRows) - -- FIXME: Org doesn't allow blocks with height more than 1. - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = maximum (1 : map height blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToOrg row - return $ makeRow cols) rows - let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '|' - let body = vcat rows' - let head'' = if all null headers - then empty - else head' $$ border '-' - return $ head'' $$ body $$ caption $$ blankline -blockToOrg (BulletList items) = do - contents <- mapM bulletListItemToOrg items - -- ensure that sublists have preceding blank line - return $ blankline $+$ vcat contents $$ blankline -blockToOrg (OrderedList (start, _, delim) items) = do - let delim' = case delim of - TwoParens -> OneParen - x -> x - let markers = take (length items) $ orderedListMarkers - (start, Decimal, delim') - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $ - zip markers' items - -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline -blockToOrg (DefinitionList items) = do - contents <- mapM definitionListItemToOrg items - return $ vcat contents $$ blankline - --- | Convert bullet list item (list of blocks) to Org. -bulletListItemToOrg :: [Block] -> State WriterState Doc -bulletListItemToOrg items = do - contents <- blockListToOrg items - return $ hang 2 "- " (contents <> cr) - --- | Convert ordered list item (a list of blocks) to Org. -orderedListItemToOrg :: String -- ^ marker for list item - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToOrg marker items = do - contents <- blockListToOrg items - return $ hang (length marker + 1) (text marker <> space) (contents <> cr) - --- | Convert defintion list item (label, list of blocks) to Org. -definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc -definitionListItemToOrg (label, defs) = do - label' <- inlineListToOrg label - contents <- liftM vcat $ mapM blockListToOrg defs - return $ hang 2 "- " $ label' <> " :: " <> (contents <> cr) - --- | Convert list of key/value pairs to Org :PROPERTIES: drawer. -propertiesDrawer :: Attr -> Doc -propertiesDrawer (ident, classes, kv) = - let - drawerStart = text ":PROPERTIES:" - drawerEnd = text ":END:" - kv' = if (classes == mempty) then kv else ("CLASS", unwords classes):kv - kv'' = if (ident == mempty) then kv' else ("CUSTOM_ID", ident):kv' - properties = vcat $ map kvToOrgProperty kv'' - in - drawerStart <> cr <> properties <> cr <> drawerEnd - where - kvToOrgProperty :: (String, String) -> Doc - kvToOrgProperty (key, value) = - text ":" <> text key <> text ": " <> text value <> cr - -attrHtml :: Attr -> Doc -attrHtml ("" , [] , []) = mempty -attrHtml (ident, classes, kvs) = - let - name = if (null ident) then mempty else "#+NAME: " <> text ident <> cr - keyword = "#+ATTR_HTML" - classKv = ("class", unwords classes) - kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) - in name <> keyword <> ": " <> text (unwords kvStrings) <> cr - --- | Convert list of Pandoc block elements to Org. -blockListToOrg :: [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to Org. -inlineListToOrg :: [Inline] -> State WriterState Doc -inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat - --- | Convert Pandoc inline element to Org. -inlineToOrg :: Inline -> State WriterState Doc -inlineToOrg (Span (uid, [], []) []) = - return $ "<<" <> text uid <> ">>" -inlineToOrg (Span _ lst) = - inlineListToOrg lst -inlineToOrg (Emph lst) = do - contents <- inlineListToOrg lst - return $ "/" <> contents <> "/" -inlineToOrg (Strong lst) = do - contents <- inlineListToOrg lst - return $ "*" <> contents <> "*" -inlineToOrg (Strikeout lst) = do - contents <- inlineListToOrg lst - return $ "+" <> contents <> "+" -inlineToOrg (Superscript lst) = do - contents <- inlineListToOrg lst - return $ "^{" <> contents <> "}" -inlineToOrg (Subscript lst) = do - contents <- inlineListToOrg lst - return $ "_{" <> contents <> "}" -inlineToOrg (SmallCaps lst) = inlineListToOrg lst -inlineToOrg (Quoted SingleQuote lst) = do - contents <- inlineListToOrg lst - return $ "'" <> contents <> "'" -inlineToOrg (Quoted DoubleQuote lst) = do - contents <- inlineListToOrg lst - return $ "\"" <> contents <> "\"" -inlineToOrg (Cite _ lst) = inlineListToOrg lst -inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return $ text $ escapeString str -inlineToOrg (Math t str) = do - modify $ \st -> st{ stHasMath = True } - return $ if t == InlineMath - then "$" <> text str <> "$" - else "$$" <> text str <> "$$" -inlineToOrg (RawInline f@(Format f') str) = - return $ if isRawFormat f - then text str - else "@@" <> text f' <> ":" <> text str <> "@@" -inlineToOrg LineBreak = return (text "\\\\" <> cr) -inlineToOrg Space = return space -inlineToOrg SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space -inlineToOrg (Link _ txt (src, _)) = do - case txt of - [Str x] | escapeURI x == src -> -- autolink - do return $ "[[" <> text (orgPath x) <> "]]" - _ -> do contents <- inlineListToOrg txt - return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]" -inlineToOrg (Image _ _ (source, _)) = do - return $ "[[" <> text (orgPath source) <> "]]" -inlineToOrg (Note contents) = do - -- add to notes in state - notes <- get >>= (return . stNotes) - modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 - return $ "[fn:" <> text ref <> "]" - -orgPath :: String -> String -orgPath src = - case src of - [] -> mempty -- wiki link - ('#':_) -> src -- internal link - _ | isUrl src -> src - _ | isFilePath src -> src - _ -> "file:" <> src - where - isFilePath :: String -> Bool - isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"] - - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) - --- | Translate from pandoc's programming language identifiers to those used by --- org-mode. -pandocLangToOrg :: String -> String -pandocLangToOrg cs = - case cs of - "c" -> "C" - "cpp" -> "C++" - "commonlisp" -> "lisp" - "r" -> "R" - "bash" -> "sh" - _ -> cs - --- | List of language identifiers recognized by org-mode. -orgLangIdentifiers :: [String] -orgLangIdentifiers = - [ "asymptote", "awk", "C", "C++", "clojure", "css", "d", "ditaa", "dot" - , "calc", "emacs-lisp", "fortran", "gnuplot", "haskell", "java", "js" - , "latex", "ledger", "lisp", "lilypond", "matlab", "mscgen", "ocaml" - , "octave", "org", "oz", "perl", "plantuml", "processing", "python", "R" - , "ruby", "sass", "scheme", "screen", "sed", "sh", "sql", "sqlite" - ] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs deleted file mode 100644 index 5cce64d17..000000000 --- a/src/Text/Pandoc/Writers/RST.hs +++ /dev/null @@ -1,556 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to reStructuredText. - -reStructuredText: <http://docutils.sourceforge.net/rst.html> --} -module Text.Pandoc.Writers.RST ( writeRST ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Builder (deleteMeta) -import Data.Maybe (fromMaybe) -import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose ) -import Network.URI (isURI) -import Text.Pandoc.Pretty -import Control.Monad.State -import Data.Char (isSpace, toLower) -import Text.Pandoc.Class (PandocMonad) - -type Refs = [([Inline], Target)] - -data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Refs - , stImages :: [([Inline], (Attr, String, String, Maybe String))] - , stHasMath :: Bool - , stHasRawTeX :: Bool - , stOptions :: WriterOptions - , stTopLevel :: Bool - } - --- | Convert Pandoc to RST. -writeRST :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeRST opts document = return $ - let st = WriterState { stNotes = [], stLinks = [], - stImages = [], stHasMath = False, - stHasRawTeX = False, stOptions = opts, - stTopLevel = True} - in evalState (pandocToRST document) st - --- | Return RST representation of document. -pandocToRST :: Pandoc -> State WriterState String -pandocToRST (Pandoc meta blocks) = do - opts <- liftM stOptions get - let colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - let subtit = case lookupMeta "subtitle" meta of - Just (MetaBlocks [Plain xs]) -> xs - _ -> [] - title <- titleToRST (docTitle meta) subtit - metadata <- metaToJSON opts - (fmap (render colwidth) . blockListToRST) - (fmap (trimr . render colwidth) . inlineListToRST) - $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST' True $ case writerTemplate opts of - Just _ -> normalizeHeadings 1 blocks - Nothing -> blocks - notes <- liftM (reverse . stNotes) get >>= notesToRST - -- note that the notes may contain refs, so we do them first - refs <- liftM (reverse . stLinks) get >>= refsToRST - pics <- liftM (reverse . stImages) get >>= pictRefsToRST - hasMath <- liftM stHasMath get - rawTeX <- liftM stHasRawTeX get - let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] - let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (show $ writerTOCDepth opts) - $ defField "math" hasMath - $ defField "title" (render Nothing title :: String) - $ defField "math" hasMath - $ defField "rawtex" rawTeX - $ metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - where - normalizeHeadings lev (Header l a i:bs) = - Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' - where (cont,bs') = break (headerLtEq l) bs - headerLtEq level (Header l' _ _) = l' <= level - headerLtEq _ _ = False - normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs - normalizeHeadings _ [] = [] - --- | Return RST representation of reference key table. -refsToRST :: Refs -> State WriterState Doc -refsToRST refs = mapM keyToRST refs >>= return . vcat - --- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) - -> State WriterState Doc -keyToRST (label, (src, _)) = do - label' <- inlineListToRST label - let label'' = if ':' `elem` ((render Nothing label') :: String) - then char '`' <> label' <> char '`' - else label' - return $ nowrap $ ".. _" <> label'' <> ": " <> text src - --- | Return RST representation of notes. -notesToRST :: [[Block]] -> State WriterState Doc -notesToRST notes = - mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= - return . vsep - --- | Return RST representation of a note. -noteToRST :: Int -> [Block] -> State WriterState Doc -noteToRST num note = do - contents <- blockListToRST note - let marker = ".. [" <> text (show num) <> "]" - return $ nowrap $ marker $$ nest 3 contents - --- | Return RST representation of picture reference table. -pictRefsToRST :: [([Inline], (Attr, String, String, Maybe String))] - -> State WriterState Doc -pictRefsToRST refs = mapM pictToRST refs >>= return . vcat - --- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (Attr, String, String, Maybe String)) - -> State WriterState Doc -pictToRST (label, (attr, src, _, mbtarget)) = do - label' <- inlineListToRST label - dims <- imageDimsToRST attr - let (_, cls, _) = attr - classes = if null cls - then empty - else ":class: " <> text (unwords cls) - return $ nowrap - $ ".. |" <> label' <> "| image:: " <> text src $$ hang 3 empty (classes $$ dims) - $$ case mbtarget of - Nothing -> empty - Just t -> " :target: " <> text t - --- | Escape special characters for RST. -escapeString :: WriterOptions -> String -> String -escapeString _ [] = [] -escapeString opts (c:cs) = - case c of - _ | c `elem` ['\\','`','*','_','|'] -> '\\':c:escapeString opts cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString opts cs - _ -> '-':escapeString opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString opts rest - _ -> '.':escapeString opts cs - _ -> c : escapeString opts cs - -titleToRST :: [Inline] -> [Inline] -> State WriterState Doc -titleToRST [] _ = return empty -titleToRST tit subtit = do - title <- inlineListToRST tit - subtitle <- inlineListToRST subtit - return $ bordered title '=' $$ bordered subtitle '-' - -bordered :: Doc -> Char -> Doc -bordered contents c = - if len > 0 - then border $$ contents $$ border - else empty - where len = offset contents - border = text (replicate len c) - --- | Convert Pandoc block element to RST. -blockToRST :: Block -- ^ Block element - -> State WriterState Doc -blockToRST Null = return empty -blockToRST (Div attr bs) = do - contents <- blockListToRST bs - let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr) - let endTag = ".. raw:: html" $+$ nest 3 "</div>" - return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline -blockToRST (Plain inlines) = inlineListToRST inlines --- title beginning with fig: indicates that the image is a figure -blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- inlineListToRST txt - dims <- imageDimsToRST attr - let fig = "figure:: " <> text src - alt = ":alt: " <> if null tit then capt else text tit - (_,cls,_) = attr - classes = if null cls - then empty - else ":figclass: " <> text (unwords cls) - return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline -blockToRST (Para inlines) - | LineBreak `elem` inlines = do -- use line block if LineBreaks - linesToLineBlock $ splitBy (==LineBreak) inlines - | otherwise = do - contents <- inlineListToRST inlines - return $ contents <> blankline -blockToRST (LineBlock lns) = - linesToLineBlock lns -blockToRST (RawBlock f@(Format f') str) - | f == "rst" = return $ text str - | otherwise = return $ blankline <> ".. raw:: " <> - text (map toLower f') $+$ - (nest 3 $ text str) $$ blankline -blockToRST HorizontalRule = - return $ blankline $$ "--------------" $$ blankline -blockToRST (Header level (name,classes,_) inlines) = do - contents <- inlineListToRST inlines - isTopLevel <- gets stTopLevel - if isTopLevel - then do - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline - else do - let rub = "rubric:: " <> contents - let name' | null name = empty - | otherwise = ":name: " <> text name - let cls | null classes = empty - | otherwise = ":class: " <> text (unwords classes) - return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline -blockToRST (CodeBlock (_,classes,kvs) str) = do - opts <- stOptions <$> get - let tabstop = writerTabStop opts - let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs - let numberlines = if "numberLines" `elem` classes - then " :number-lines:" <> startnum - else empty - if "haskell" `elem` classes && "literate" `elem` classes && - isEnabled Ext_literate_haskell opts - then return $ prefixed "> " (text str) $$ blankline - else return $ - (case [c | c <- classes, - c `notElem` ["sourceCode","literate","numberLines"]] of - [] -> "::" - (lang:_) -> (".. code:: " <> text lang) $$ numberlines) - $+$ nest tabstop (text str) $$ blankline -blockToRST (BlockQuote blocks) = do - tabstop <- get >>= (return . writerTabStop . stOptions) - contents <- blockListToRST blocks - return $ (nest tabstop contents) <> blankline -blockToRST (Table caption _ widths headers rows) = do - caption' <- inlineListToRST caption - headers' <- mapM blockListToRST headers - rawRows <- mapM (mapM blockListToRST) rows - -- let isSimpleCell [Plain _] = True - -- isSimpleCell [Para _] = True - -- isSimpleCell [] = True - -- isSimpleCell _ = False - -- let isSimple = all (==0) widths && all (all isSimpleCell) rows - let numChars = maximum . map offset - opts <- get >>= return . stOptions - let widthsInChars = - if all (== 0) widths - then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (fromIntegral (writerColumns opts) *)) widths - let hpipeBlocks blocks = hcat [beg, middle, end] - where h = height (hcat blocks) - sep' = lblock 3 $ vcat (map text $ replicate h " | ") - beg = lblock 2 $ vcat (map text $ replicate h "| ") - end = lblock 2 $ vcat (map text $ replicate h " |") - middle = hcat $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith lblock widthsInChars - let head' = makeRow headers' - let rows' = map makeRow rawRows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') rows' - let head'' = if all null headers - then empty - else head' $$ border '=' - let tbl = border '-' $$ head'' $$ body $$ border '-' - return $ if null caption - then tbl $$ blankline - else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$ - blankline -blockToRST (BulletList items) = do - contents <- mapM bulletListItemToRST items - -- ensure that sublists have preceding blank line - return $ blankline $$ chomp (vcat contents) $$ blankline -blockToRST (OrderedList (start, style', delim) items) = do - let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim - then take (length items) $ repeat "#." - else take (length items) $ orderedListMarkers - (start, style', delim) - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ - zip markers' items - -- ensure that sublists have preceding blank line - return $ blankline $$ chomp (vcat contents) $$ blankline -blockToRST (DefinitionList items) = do - contents <- mapM definitionListItemToRST items - -- ensure that sublists have preceding blank line - return $ blankline $$ chomp (vcat contents) $$ blankline - --- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: [Block] -> State WriterState Doc -bulletListItemToRST items = do - contents <- blockListToRST items - return $ hang 3 "- " $ contents <> cr - --- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: String -- ^ marker for list item - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToRST marker items = do - contents <- blockListToRST items - let marker' = marker ++ " " - return $ hang (length marker') (text marker') $ contents <> cr - --- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc -definitionListItemToRST (label, defs) = do - label' <- inlineListToRST label - contents <- liftM vcat $ mapM blockListToRST defs - tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (nestle contents <> cr) - --- | Format a list of lines as line block. -linesToLineBlock :: [[Inline]] -> State WriterState Doc -linesToLineBlock inlineLines = do - lns <- mapM inlineListToRST inlineLines - return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline - --- | Convert list of Pandoc block elements to RST. -blockListToRST' :: Bool - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToRST' topLevel blocks = do - tl <- gets stTopLevel - modify (\s->s{stTopLevel=topLevel}) - res <- vcat `fmap` mapM blockToRST blocks - modify (\s->s{stTopLevel=tl}) - return res - -blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToRST = blockListToRST' False - --- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = - mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= - return . hcat - where -- remove spaces after displaymath, as they screw up indentation: - removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = - Math DisplayMath x : dropWhile (==Space) zs - removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs - removeSpaceAfterDisplayMath [] = [] - insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed - insertBS (x:y:z:zs) - | isComplex y && (surroundComplex x z) = - x : y : insertBS (z : zs) - insertBS (x:y:zs) - | isComplex x && not (okAfterComplex y) = - x : RawInline "rst" "\\ " : insertBS (y : zs) - | isComplex y && not (okBeforeComplex x) = - x : RawInline "rst" "\\ " : insertBS (y : zs) - | otherwise = - x : insertBS (y : zs) - insertBS (x:ys) = x : insertBS ys - insertBS [] = [] - surroundComplex :: Inline -> Inline -> Bool - surroundComplex (Str s@(_:_)) (Str s'@(_:_)) = - case (last s, head s') of - ('\'','\'') -> True - ('"','"') -> True - ('<','>') -> True - ('[',']') -> True - ('{','}') -> True - _ -> False - surroundComplex _ _ = False - okAfterComplex :: Inline -> Bool - okAfterComplex Space = True - okAfterComplex SoftBreak = True - okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) - okAfterComplex _ = False - okBeforeComplex :: Inline -> Bool - okBeforeComplex Space = True - okBeforeComplex SoftBreak = True - okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) - okBeforeComplex _ = False - isComplex :: Inline -> Bool - isComplex (Emph _) = True - isComplex (Strong _) = True - isComplex (SmallCaps _) = True - isComplex (Strikeout _) = True - isComplex (Superscript _) = True - isComplex (Subscript _) = True - isComplex (Link _ _ _) = True - isComplex (Image _ _ _) = True - isComplex (Code _ _) = True - isComplex (Math _ _) = True - isComplex (Cite _ (x:_)) = isComplex x - isComplex (Span _ (x:_)) = isComplex x - isComplex _ = False - --- | Convert Pandoc inline element to RST. -inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Span _ ils) = inlineListToRST ils -inlineToRST (Emph lst) = do - contents <- inlineListToRST lst - return $ "*" <> contents <> "*" -inlineToRST (Strong lst) = do - contents <- inlineListToRST lst - return $ "**" <> contents <> "**" -inlineToRST (Strikeout lst) = do - contents <- inlineListToRST lst - return $ "[STRIKEOUT:" <> contents <> "]" -inlineToRST (Superscript lst) = do - contents <- inlineListToRST lst - return $ ":sup:`" <> contents <> "`" -inlineToRST (Subscript lst) = do - contents <- inlineListToRST lst - return $ ":sub:`" <> contents <> "`" -inlineToRST (SmallCaps lst) = inlineListToRST lst -inlineToRST (Quoted SingleQuote lst) = do - contents <- inlineListToRST lst - opts <- gets stOptions - if isEnabled Ext_smart opts - then return $ "'" <> contents <> "'" - else return $ "‘" <> contents <> "’" -inlineToRST (Quoted DoubleQuote lst) = do - contents <- inlineListToRST lst - opts <- gets stOptions - if isEnabled Ext_smart opts - then return $ "\"" <> contents <> "\"" - else return $ "“" <> contents <> "”" -inlineToRST (Cite _ lst) = - inlineListToRST lst -inlineToRST (Code _ str) = return $ "``" <> text str <> "``" -inlineToRST (Str str) = do - opts <- gets stOptions - return $ text $ - (if isEnabled Ext_smart opts - then unsmartify opts - else id) $ escapeString opts str -inlineToRST (Math t str) = do - modify $ \st -> st{ stHasMath = True } - return $ if t == InlineMath - then ":math:`" <> text str <> "`" - else if '\n' `elem` str - then blankline $$ ".. math::" $$ - blankline $$ nest 3 (text str) $$ blankline - else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline f x) - | f == "rst" = return $ text x - | f == "latex" || f == "tex" = do - modify $ \st -> st{ stHasRawTeX = True } - return $ ":raw-latex:`" <> text x <> "`" - | otherwise = return empty -inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) -inlineToRST Space = return space -inlineToRST SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - case wrapText of - WrapPreserve -> return cr - WrapAuto -> return space - WrapNone -> return space --- autolink -inlineToRST (Link _ [Str str] (src, _)) - | isURI src && - if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) - else src == escapeURI str = do - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - return $ text srcSuffix -inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do - label <- registerImage attr alt (imgsrc,imgtit) (Just src) - return $ "|" <> label <> "|" -inlineToRST (Link _ txt (src, tit)) = do - useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions - linktext <- inlineListToRST $ normalizeSpaces txt - if useReferenceLinks - then do refs <- get >>= return . stLinks - case lookup txt refs of - Just (src',tit') -> - if src == src' && tit == tit' - then return $ "`" <> linktext <> "`_" - else do -- duplicate label, use non-reference link - return $ "`" <> linktext <> " <" <> text src <> ">`__" - Nothing -> do - modify $ \st -> st { stLinks = (txt,(src,tit)):refs } - return $ "`" <> linktext <> "`_" - else return $ "`" <> linktext <> " <" <> text src <> ">`__" -inlineToRST (Image attr alternate (source, tit)) = do - label <- registerImage attr alternate (source,tit) Nothing - return $ "|" <> label <> "|" -inlineToRST (Note contents) = do - -- add to notes in state - notes <- gets stNotes - modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 - return $ " [" <> text ref <> "]_" - -registerImage :: Attr -> [Inline] -> Target -> Maybe String -> State WriterState Doc -registerImage attr alt (src,tit) mbtarget = do - pics <- get >>= return . stImages - txt <- case lookup alt pics of - Just (a,s,t,mbt) | (a,s,t,mbt) == (attr,src,tit,mbtarget) - -> return alt - _ -> do - let alt' = if null alt || alt == [Str ""] - then [Str $ "image" ++ show (length pics)] - else alt - modify $ \st -> st { stImages = - (alt', (attr,src,tit, mbtarget)):stImages st } - return alt' - inlineListToRST txt - -imageDimsToRST :: Attr -> State WriterState Doc -imageDimsToRST attr = do - let (ident, _, _) = attr - name = if null ident - then empty - else ":name: " <> text ident - showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d) - in case (dimension dir attr) of - Just (Percent a) -> - case dir of - Height -> empty - Width -> cols (Percent a) - Just dim -> cols dim - Nothing -> empty - return $ cr <> name $$ showDim Width $$ showDim Height diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs deleted file mode 100644 index ef012e58e..000000000 --- a/src/Text/Pandoc/Writers/RTF.hs +++ /dev/null @@ -1,412 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to RTF (rich text format). --} -module Text.Pandoc.Writers.RTF ( writeRTF - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Writers.Math -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Walk -import Text.Pandoc.Logging -import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, chr, isDigit ) -import qualified Data.ByteString as B -import qualified Data.Map as M -import Text.Printf ( printf ) -import Text.Pandoc.ImageSize -import Control.Monad.Except (throwError, runExceptT, lift) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import qualified Text.Pandoc.Class as P - --- | Convert Image inlines into a raw RTF embedded image, read from a file, --- or a MediaBag, or the internet. --- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: PandocMonad m => WriterOptions -> Inline -> m Inline -rtfEmbedImage opts x@(Image attr _ (src,_)) = do - result <- runExceptT $ lift $ P.fetchItem (writerSourceURL opts) src - case result of - Right (imgdata, Just mime) - | mime == "image/jpeg" || mime == "image/png" -> do - let bytes = map (printf "%02x") $ B.unpack imgdata - filetype <- case mime of - "image/jpeg" -> return "\\jpegblip" - "image/png" -> return "\\pngblip" - _ -> throwError $ PandocSomeError "Unknown file type" - sizeSpec <- case imageSize imgdata of - Left msg -> do - report $ CouldNotDetermineImageSize src msg - return "" - Right sz -> return $ "\\picw" ++ show xpx ++ - "\\pich" ++ show ypx ++ - "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) - ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) - -- twip = 1/1440in = 1/20pt - where (xpx, ypx) = sizeInPixels sz - (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ - concat bytes ++ "}" - if B.null imgdata - then do - report $ CouldNotFetchResource src "image contained no data" - return x - else return $ RawInline (Format "rtf") raw - | otherwise -> do - report $ CouldNotFetchResource src "image is not a jpeg or png" - return x - Right (_, Nothing) -> do - report $ CouldNotDetermineMimeType src - return x - Left ( e :: PandocError ) -> do - report $ CouldNotFetchResource src (show e) - return x -rtfEmbedImage _ x = return x - --- | Convert Pandoc to a string in rich text format. -writeRTF :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeRTF options doc = do - -- handle images - Pandoc meta@(Meta metamap) blocks <- walkM (rtfEmbedImage options) doc - let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta - let toPlain (MetaBlocks [Para ils]) = MetaInlines ils - toPlain x = x - -- adjust title, author, date so we don't get para inside para - let meta' = Meta $ M.adjust toPlain "title" - . M.adjust toPlain "author" - . M.adjust toPlain "date" - $ metamap - metadata <- metaToJSON options - (fmap concat . mapM (blockToRTF 0 AlignDefault)) - (inlinesToRTF) - meta' - body <- blocksToRTF 0 AlignDefault blocks - let isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options - isTOCHeader _ = False - toc <- tableOfContents $ filter isTOCHeader blocks - let context = defField "body" body - $ defField "spacer" spacer - $ (if writerTableOfContents options - then defField "toc" toc - else id) - $ metadata - return $ case writerTemplate options of - Just tpl -> renderTemplate' tpl context - Nothing -> case reverse body of - ('\n':_) -> body - _ -> body ++ "\n" - --- | Construct table of contents from list of header blocks. -tableOfContents :: PandocMonad m => [Block] -> m String -tableOfContents headers = do - let contents = map elementToListItem $ hierarchicalize headers - blocksToRTF 0 AlignDefault $ - [Header 1 nullAttr [Str "Contents"], BulletList contents] - -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++ - if null subsecs - then [] - else [BulletList (map elementToListItem subsecs)] - --- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = - if ord c > 127 - then if surrogate c - then let x = ord c - 0x10000 - (q, r) = x `divMod` 0x400 - upper = q + 0xd800 - lower = r + 0xDC00 - in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs - else enc c ++ handleUnicode cs - else c:(handleUnicode cs) - where - surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) - || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = '\\':'u':(show (ord x)) ++ "?" - --- | Escape special characters. -escapeSpecial :: String -> String -escapeSpecial = escapeStringUsing $ - [ ('\t',"\\tab ") - , ('\8216',"\\u8216'") - , ('\8217',"\\u8217'") - , ('\8220',"\\u8220\"") - , ('\8221',"\\u8221\"") - , ('\8211',"\\u8211-") - , ('\8212',"\\u8212-") - ] ++ backslashEscapes "{\\}" - --- | Escape strings as needed for rich text format. -stringToRTF :: String -> String -stringToRTF = handleUnicode . escapeSpecial - --- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str) - --- | Make a paragraph with first-line indent, block indent, and space after. -rtfParSpaced :: Int -- ^ space after (in twips) - -> Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfParSpaced spaceAfter indent firstLineIndent alignment content = - let alignString = case alignment of - AlignLeft -> "\\ql " - AlignRight -> "\\qr " - AlignCenter -> "\\qc " - AlignDefault -> "\\ql " - in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" - --- | Default paragraph. -rtfPar :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfPar = rtfParSpaced 180 - --- | Compact paragraph (e.g. for compact list items). -rtfCompact :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfCompact = rtfParSpaced 0 - --- number of twips to indent -indentIncrement :: Int -indentIncrement = 720 - -listIncrement :: Int -listIncrement = 360 - --- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String -bulletMarker indent = case indent `mod` 720 of - 0 -> "\\bullet " - _ -> "\\endash " - --- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) = - if style == DefaultStyle && delim == DefaultDelim - then case indent `mod` 720 of - 0 -> orderedListMarkers (start, Decimal, Period) - _ -> orderedListMarkers (start, LowerAlpha, Period) - else orderedListMarkers (start, style, delim) - -blocksToRTF :: PandocMonad m - => Int - -> Alignment - -> [Block] - -> m String -blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) - --- | Convert Pandoc block element to RTF. -blockToRTF :: PandocMonad m - => Int -- ^ indent level - -> Alignment -- ^ alignment - -> Block -- ^ block to convert - -> m String -blockToRTF _ _ Null = return "" -blockToRTF indent alignment (Div _ bs) = - blocksToRTF indent alignment bs -blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment <$> inlinesToRTF lst -blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment <$> inlinesToRTF lst -blockToRTF indent alignment (LineBlock lns) = - blockToRTF indent alignment $ linesToPara lns -blockToRTF indent alignment (BlockQuote lst) = - blocksToRTF (indent + indentIncrement) alignment lst -blockToRTF indent _ (CodeBlock _ str) = - return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ b@(RawBlock f str) - | f == Format "rtf" = return str - | otherwise = do - report $ BlockNotRendered b - return "" -blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> - mapM (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = - (spaceAtEnd . concat) <$> - mapM (\(x,y) -> listItemToRTF alignment indent x y) - (zip (orderedMarkers indent attribs) lst) -blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> - mapM (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = return $ - rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level _ lst) = do - contents <- inlinesToRTF lst - return $ rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ contents -blockToRTF indent alignment (Table caption aligns sizes headers rows) = do - caption' <- inlinesToRTF caption - header' <- if all null headers - then return "" - else tableRowToRTF True indent aligns sizes headers - rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows - return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' - -tableRowToRTF :: PandocMonad m - => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String -tableRowToRTF header indent aligns sizes' cols = do - let totalTwips = 6 * 1440 -- 6 inches - let sizes = if all (== 0) sizes' - then take (length cols) $ repeat (1.0 / fromIntegral (length cols)) - else sizes' - columns <- concat <$> mapM (\(x,y) -> tableItemToRTF indent x y) - (zip aligns cols) - let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) - (0 :: Integer) sizes - let cellDefs = map (\edge -> (if header - then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) - rightEdges - let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ - "\\trkeep\\intbl\n{\n" - let end = "}\n\\intbl\\row}\n" - return $ start ++ columns ++ end - -tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String -tableItemToRTF indent alignment item = do - contents <- blocksToRTF indent alignment item - return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" - --- | Ensure that there's the same amount of space after compact --- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str = - if isSuffixOf "\\par}\n" str - then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" - else str - --- | Convert list item (list of blocks) to RTF. -listItemToRTF :: PandocMonad m - => Alignment -- ^ alignment - -> Int -- ^ indent level - -> String -- ^ list start marker - -> [Block] -- ^ list item (list of blocks) - -> m String -listItemToRTF alignment indent marker [] = return $ - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = do - (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list - let listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ - "\\tx" ++ show listIncrement ++ "\\tab" - let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker ('\\':'f':'i':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker (x:xs) = - x : insertListMarker xs - insertListMarker [] = [] - -- insert the list marker into the (processed) first block - return $ insertListMarker first ++ concat rest - --- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: PandocMonad m - => Alignment -- ^ alignment - -> Int -- ^ indent level - -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> m String -definitionListItemToRTF alignment indent (label, defs) = do - labelText <- blockToRTF indent alignment (Plain label) - itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) - return $ labelText ++ itemsText - --- | Convert list of inline items to RTF. -inlinesToRTF :: PandocMonad m - => [Inline] -- ^ list of inlines to convert - -> m String -inlinesToRTF lst = concat <$> mapM inlineToRTF lst - --- | Convert inline item to RTF. -inlineToRTF :: PandocMonad m - => Inline -- ^ inline to convert - -> m String -inlineToRTF (Span _ lst) = inlinesToRTF lst -inlineToRTF (Emph lst) = do - contents <- inlinesToRTF lst - return $ "{\\i " ++ contents ++ "}" -inlineToRTF (Strong lst) = do - contents <- inlinesToRTF lst - return $ "{\\b " ++ contents ++ "}" -inlineToRTF (Strikeout lst) = do - contents <- inlinesToRTF lst - return $ "{\\strike " ++ contents ++ "}" -inlineToRTF (Superscript lst) = do - contents <- inlinesToRTF lst - return $ "{\\super " ++ contents ++ "}" -inlineToRTF (Subscript lst) = do - contents <- inlinesToRTF lst - return $ "{\\sub " ++ contents ++ "}" -inlineToRTF (SmallCaps lst) = do - contents <- inlinesToRTF lst - return $ "{\\scaps " ++ contents ++ "}" -inlineToRTF (Quoted SingleQuote lst) = do - contents <- inlinesToRTF lst - return $ "\\u8216'" ++ contents ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = do - contents <- inlinesToRTF lst - return $ "\\u8220\"" ++ contents ++ "\\u8221\"" -inlineToRTF (Code _ str) = return $ "{\\f1 " ++ (codeStringToRTF str) ++ "}" -inlineToRTF (Str str) = return $ stringToRTF str -inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF -inlineToRTF (Cite _ lst) = inlinesToRTF lst -inlineToRTF il@(RawInline f str) - | f == Format "rtf" = return str - | otherwise = do - return $ InlineNotRendered il - return "" -inlineToRTF (LineBreak) = return "\\line " -inlineToRTF SoftBreak = return " " -inlineToRTF Space = return " " -inlineToRTF (Link _ text (src, _)) = do - contents <- inlinesToRTF text - return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" -inlineToRTF (Image _ _ (source, _)) = - return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = do - body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents - return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - body ++ "}" diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs deleted file mode 100644 index 89a826269..000000000 --- a/src/Text/Pandoc/Writers/Shared.hs +++ /dev/null @@ -1,183 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2013-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Shared utility functions for pandoc writers. --} -module Text.Pandoc.Writers.Shared ( - metaToJSON - , getField - , setField - , defField - , tagWithAttrs - , fixDisplayMath - , unsmartify - ) -where -import Text.Pandoc.Definition -import Text.Pandoc.Pretty -import Text.Pandoc.Options -import Text.Pandoc.XML (escapeStringForXML) -import Control.Monad (liftM) -import qualified Data.HashMap.Strict as H -import qualified Data.Map as M -import qualified Data.Text as T -import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..), encode) -import Text.Pandoc.UTF8 (toStringLazy) -import qualified Data.Traversable as Traversable -import Data.List ( groupBy ) -import Data.Maybe ( isJust ) - --- | Create JSON value for template from a 'Meta' and an association list --- of variables, specified at the command line or in the writer. --- Variables overwrite metadata fields with the same names. --- If multiple variables are set with the same name, a list is --- assigned. -metaToJSON :: Monad m - => WriterOptions - -> ([Block] -> m String) - -> ([Inline] -> m String) - -> Meta - -> m Value -metaToJSON opts blockWriter inlineWriter (Meta metamap) - | isJust (writerTemplate opts) = do - let baseContext = foldl (\acc (x,y) -> setField x y acc) (Object H.empty) - $ writerVariables opts - renderedMap <- Traversable.mapM - (metaValueToJSON blockWriter inlineWriter) - metamap - let metadata = M.foldWithKey defField baseContext renderedMap - return $ defField "meta-json" (toStringLazy $ encode metadata) metadata - | otherwise = return (Object H.empty) - -metaValueToJSON :: Monad m - => ([Block] -> m String) - -> ([Inline] -> m String) - -> MetaValue - -> m Value -metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ - Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap -metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $ - Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs -metaValueToJSON _ _ (MetaBool b) = return $ toJSON b -metaValueToJSON _ _ (MetaString s) = return $ toJSON s -metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs -metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs - --- | Retrieve a field value from a JSON object. -getField :: FromJSON a - => String - -> Value - -> Maybe a -getField field (Object hashmap) = do - result <- H.lookup (T.pack field) hashmap - case fromJSON result of - Success x -> return x - _ -> fail "Could not convert from JSON" -getField _ _ = fail "Not a JSON object" - -setField :: ToJSON a - => String - -> a - -> Value - -> Value --- | Set a field of a JSON object. If the field already has a value, --- convert it into a list with the new value appended to the old value(s). --- This is a utility function to be used in preparing template contexts. -setField field val (Object hashmap) = - Object $ H.insertWith combine (T.pack field) (toJSON val) hashmap - where combine newval oldval = - case fromJSON oldval of - Success xs -> toJSON $ xs ++ [newval] - _ -> toJSON [oldval, newval] -setField _ _ x = x - -defField :: ToJSON a - => String - -> a - -> Value - -> Value --- | Set a field of a JSON object if it currently has no value. --- If it has a value, do nothing. --- This is a utility function to be used in preparing template contexts. -defField field val (Object hashmap) = - Object $ H.insertWith f (T.pack field) (toJSON val) hashmap - where f _newval oldval = oldval -defField _ _ x = x - --- Produce an HTML tag with the given pandoc attributes. -tagWithAttrs :: String -> Attr -> Doc -tagWithAttrs tag (ident,classes,kvs) = hsep - ["<" <> text tag - ,if null ident - then empty - else "id=" <> doubleQuotes (text ident) - ,if null classes - then empty - else "class=" <> doubleQuotes (text (unwords classes)) - ,hsep (map (\(k,v) -> text k <> "=" <> - doubleQuotes (text (escapeStringForXML v))) kvs) - ] <> ">" - -isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False - -stripLeadingTrailingSpace :: [Inline] -> [Inline] -stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs - go (SoftBreak:xs) = xs - go xs = xs - --- Put display math in its own block (for ODT/DOCX). -fixDisplayMath :: Block -> Block -fixDisplayMath (Plain lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath (Para lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = x - -unsmartify :: WriterOptions -> String -> String -unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs -unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs -unsmartify opts ('\8211':xs) - | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs - | otherwise = "--" ++ unsmartify opts xs -unsmartify opts ('\8212':xs) - | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs - | otherwise = "---" ++ unsmartify opts xs -unsmartify opts (x:xs) = x : unsmartify opts xs -unsmartify _ [] = [] - diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs deleted file mode 100644 index a54d42c53..000000000 --- a/src/Text/Pandoc/Writers/TEI.hs +++ /dev/null @@ -1,324 +0,0 @@ -{-# LANGUAGE OverloadedStrings, PatternGuards #-} -{- -Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Docbook XML. --} -module Text.Pandoc.Writers.TEI (writeTEI) where -import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Options -import Text.Pandoc.Templates (renderTemplate') -import Data.List ( stripPrefix, isPrefixOf ) -import Data.Char ( toLower ) -import Text.Pandoc.Highlighting ( languages, languagesByExtension ) -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class ( PandocMonad ) - --- | Convert list of authors to a docbook <author> section -authorToTEI :: WriterOptions -> [Inline] -> B.Inlines -authorToTEI opts name' = - let name = render Nothing $ inlinesToTEI opts name' - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - in B.rawInline "tei" $ render colwidth $ - inTagsSimple "author" (text $ escapeStringForXML name) - --- | Convert Pandoc document to string in Docbook format. -writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeTEI opts (Pandoc meta blocks) = return $ - let elements = hierarchicalize blocks - colwidth = if writerWrapText opts == WrapAuto - then Just $ writerColumns opts - else Nothing - render' = render colwidth - startLvl = case writerTopLevelDivision opts of - TopLevelPart -> -1 - TopLevelChapter -> 0 - TopLevelSection -> 1 - TopLevelDefault -> 1 - auths' = map (authorToTEI opts) $ docAuthors meta - meta' = B.setMeta "author" auths' meta - Just metadata = metaToJSON opts - (Just . render colwidth . (vcat . - (map (elementToTEI opts startLvl)) . hierarchicalize)) - (Just . render colwidth . inlinesToTEI opts) - meta' - main = render' $ vcat (map (elementToTEI opts startLvl) elements) - context = defField "body" main - $ defField "mathml" (case writerHTMLMathMethod opts of - MathML -> True - _ -> False) - $ metadata - in case writerTemplate opts of - Nothing -> main - Just tpl -> renderTemplate' tpl context - --- | Convert an Element to TEI. -elementToTEI :: WriterOptions -> Int -> Element -> Doc -elementToTEI opts _ (Blk block) = blockToTEI opts block -elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) = - -- TEI doesn't allow sections with no content, so insert some if needed - let elements' = if null elements - then [Blk (Para [])] - else elements - -- level numbering correspond to LaTeX internals - divType = case lvl of - n | n == -1 -> "part" - | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "level" ++ show n - | otherwise -> "section" - in inTags True "div" [("type", divType) | not (null id')] $ --- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $ - inTagsSimple "head" (inlinesToTEI opts title) $$ - vcat (map (elementToTEI opts (lvl + 1)) elements') - --- | Convert a list of Pandoc blocks to TEI. -blocksToTEI :: WriterOptions -> [Block] -> Doc -blocksToTEI opts = vcat . map (blockToTEI opts) - --- | Auxiliary function to convert Plain block to Para. -plainToPara :: Block -> Block -plainToPara (Plain x) = Para x -plainToPara x = x - --- | Convert a list of pairs of terms and definitions into a TEI --- list with labels and items. -deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc -deflistItemsToTEI opts items = - vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items - --- | Convert a term and a list of blocks into a TEI varlistentry. -deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc -deflistItemToTEI opts term defs = - let def' = concatMap (map plainToPara) defs - in inTagsIndented "label" (inlinesToTEI opts term) $$ - inTagsIndented "item" (blocksToTEI opts def') - --- | Convert a list of lists of blocks to a list of TEI list items. -listItemsToTEI :: WriterOptions -> [[Block]] -> Doc -listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items - --- | Convert a list of blocks into a TEI list item. -listItemToTEI :: WriterOptions -> [Block] -> Doc -listItemToTEI opts item = - inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item - -imageToTEI :: WriterOptions -> Attr -> String -> Doc -imageToTEI _ attr src = selfClosingTag "graphic" $ - ("url", src) : idAndRole attr ++ dims - where - dims = go Width "width" ++ go Height "depth" - go dir dstr = case (dimension dir attr) of - Just a -> [(dstr, show a)] - Nothing -> [] - --- | Convert a Pandoc block element to TEI. -blockToTEI :: WriterOptions -> Block -> Doc -blockToTEI _ Null = empty --- Add ids to paragraphs in divs with ids - this is needed for --- pandoc-citeproc to get link anchors in bibliographies: -blockToTEI opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in - inTags False "p" attribs $ inlinesToTEI opts lst -blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs -blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize --- For TEI simple, text must be within containing block element, so --- we use plainToPara to ensure that Plain text ends up contained by --- something. -blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst --- title beginning with fig: indicates that the image is a figure ---blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = --- let alt = inlinesToTEI opts txt --- capt = if null txt --- then empty --- else inTagsSimple "title" alt --- in inTagsIndented "figure" $ --- capt $$ --- (inTagsIndented "mediaobject" $ --- (inTagsIndented "imageobject" --- (imageToTEI opts attr src)) $$ --- inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToTEI opts (Para lst) = - inTags False "p" [] $ inlinesToTEI opts lst -blockToTEI opts (LineBlock lns) = - blockToTEI opts $ linesToPara lns -blockToTEI opts (BlockQuote blocks) = - inTagsIndented "quote" $ blocksToTEI opts blocks -blockToTEI _ (CodeBlock (_,classes,_) str) = - text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</ab>") - where lang = if null langs - then "" - else escapeStringForXML (head langs) - isLang l = map toLower l `elem` map (map toLower) languages - langsFrom s = if isLang s - then [s] - else languagesByExtension . map toLower $ s - langs = concatMap langsFrom classes -blockToTEI opts (BulletList lst) = - let attribs = [("type", "unordered")] - in inTags True "list" attribs $ listItemsToTEI opts lst -blockToTEI _ (OrderedList _ []) = empty -blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of - DefaultStyle -> [] - Decimal -> [("type", "ordered:arabic")] - Example -> [("type", "ordered:arabic")] - UpperAlpha -> [("type", "ordered:upperalpha")] - LowerAlpha -> [("type", "ordered:loweralpha")] - UpperRoman -> [("type", "ordered:upperroman")] - LowerRoman -> [("type", "ordered:lowerroman")] - items = if start == 1 - then listItemsToTEI opts (first:rest) - else (inTags True "item" [("n",show start)] - (blocksToTEI opts $ map plainToPara first)) $$ - listItemsToTEI opts rest - in inTags True "list" attribs items -blockToTEI opts (DefinitionList lst) = - let attribs = [("type", "definition")] - in inTags True "list" attribs $ deflistItemsToTEI opts lst -blockToTEI _ (RawBlock f str) - | f == "tei" = text str -- raw TEI block (should such a thing exist). --- | f == "html" = text str -- allow html for backwards compatibility - | otherwise = empty -blockToTEI _ HorizontalRule = - selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")] - --- | TEI Tables --- TEI Simple's tables are composed of cells and rows; other --- table info in the AST is here lossily discard. -blockToTEI opts (Table _ _ _ headers rows) = - let - headers' = tableHeadersToTEI opts headers --- headers' = if all null headers --- then return empty --- else tableRowToTEI opts headers - in - inTags True "table" [] $ - vcat $ [headers'] <> map (tableRowToTEI opts) rows - -tableRowToTEI :: WriterOptions - -> [[Block]] - -> Doc -tableRowToTEI opts cols = - inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols - -tableHeadersToTEI :: WriterOptions - -> [[Block]] - -> Doc -tableHeadersToTEI opts cols = - inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols - -tableItemToTEI :: WriterOptions - -> [Block] - -> Doc -tableItemToTEI opts item = - inTags False "cell" [] $ vcat $ map (blockToTEI opts) item - --- | Convert a list of inline elements to TEI. -inlinesToTEI :: WriterOptions -> [Inline] -> Doc -inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst - --- | Convert an inline element to TEI. -inlineToTEI :: WriterOptions -> Inline -> Doc -inlineToTEI _ (Str str) = text $ escapeStringForXML str -inlineToTEI opts (Emph lst) = - inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst -inlineToTEI opts (Strong lst) = - inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst -inlineToTEI opts (Strikeout lst) = - inTags False "hi" [("rendition", "simple:strikethrough")] $ - inlinesToTEI opts lst -inlineToTEI opts (Superscript lst) = - inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst -inlineToTEI opts (Subscript lst) = - inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst -inlineToTEI opts (SmallCaps lst) = - inTags False "hi" [("rendition", "simple:smallcaps")] $ - inlinesToTEI opts lst -inlineToTEI opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToTEI opts lst -inlineToTEI opts (Cite _ lst) = - inlinesToTEI opts lst -inlineToTEI opts (Span _ ils) = - inlinesToTEI opts ils -inlineToTEI _ (Code _ str) = - inTags False "seg" [("type","code")] $ text (escapeStringForXML str) --- Distinguish display from inline math by wrapping the former in a "figure." -inlineToTEI _ (Math t str) = - case t of - InlineMath -> inTags False "formula" [("notation","TeX")] $ - text (str) - DisplayMath -> inTags True "figure" [("type","math")] $ - inTags False "formula" [("notation","TeX")] $ text (str) - -inlineToTEI _ (RawInline f x) | f == "tei" = text x - | otherwise = empty -inlineToTEI _ LineBreak = selfClosingTag "lb" [] -inlineToTEI _ Space = space --- because we use \n for LineBreak, we can't do soft breaks: -inlineToTEI _ SoftBreak = space -inlineToTEI opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = - let emailLink = text $ - escapeStringForXML $ email - in case txt of - [Str s] | escapeURI s == email -> emailLink - _ -> inlinesToTEI opts txt <+> - char '(' <> emailLink <> char ')' - | otherwise = - (if isPrefixOf "#" src - then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr - else inTags False "ref" $ ("target", src) : idAndRole attr ) $ - inlinesToTEI opts txt -inlineToTEI opts (Image attr description (src, tit)) = - let titleDoc = if null tit - then empty - else inTags False "figDesc" [] (text $ escapeStringForXML tit) - imageDesc = if null description - then empty - else inTags False "head" [] (inlinesToTEI opts description) - in inTagsIndented "figure" $ imageDesc $$ - imageToTEI opts attr src $$ titleDoc -inlineToTEI opts (Note contents) = - inTagsIndented "note" $ blocksToTEI opts contents - -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role - where - ident = if null id' - then [] - else [("id", id')] - role = if null cls - then [] - else [("role", unwords cls)] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs deleted file mode 100644 index fe6024351..000000000 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ /dev/null @@ -1,498 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2008-2015 John MacFarlane and Peter Wang - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2015 John MacFarlane and Peter Wang - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into Texinfo. --} -module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Templates (renderTemplate') -import Text.Printf ( printf ) -import Data.List ( transpose, maximumBy ) -import Data.Ord ( comparing ) -import Data.Char ( chr, ord ) -import Control.Monad.State -import Text.Pandoc.Pretty -import Text.Pandoc.ImageSize -import Network.URI ( isURI, unEscapeString ) -import System.FilePath -import qualified Data.Set as Set -import Control.Monad.Except (throwError) -import Text.Pandoc.Error -import Text.Pandoc.Class (PandocMonad, report) -import Text.Pandoc.Logging - -data WriterState = - WriterState { stStrikeout :: Bool -- document contains strikeout - , stSuperscript :: Bool -- document contains superscript - , stSubscript :: Bool -- document contains subscript - , stEscapeComma :: Bool -- in a context where we need @comma - , stIdentifiers :: Set.Set String -- header ids used already - , stOptions :: WriterOptions -- writer options - } - -{- TODO: - - internal cross references a la HTML - - generated .texi files don't work when run through texi2dvi - -} - -type TI m = StateT WriterState m - --- | Convert Pandoc to Texinfo. -writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeTexinfo options document = - evalStateT (pandocToTexinfo options $ wrapTop document) $ - WriterState { stStrikeout = False, stSuperscript = False, - stEscapeComma = False, stSubscript = False, - stIdentifiers = Set.empty, stOptions = options} - --- | Add a "Top" node around the document, needed by Texinfo. -wrapTop :: Pandoc -> Pandoc -wrapTop (Pandoc meta blocks) = - Pandoc meta (Header 0 nullAttr (docTitle meta) : blocks) - -pandocToTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> TI m String -pandocToTexinfo options (Pandoc meta blocks) = do - let titlePage = not $ all null - $ docTitle meta : docDate meta : docAuthors meta - let colwidth = if writerWrapText options == WrapAuto - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToTexinfo) - (fmap (render colwidth) . inlineListToTexinfo) - meta - main <- blockListToTexinfo blocks - st <- get - let body = render colwidth main - let context = defField "body" body - $ defField "toc" (writerTableOfContents options) - $ defField "titlepage" titlePage - $ defField "subscript" (stSubscript st) - $ defField "superscript" (stSuperscript st) - $ defField "strikeout" (stStrikeout st) - $ metadata - case writerTemplate options of - Nothing -> return body - Just tpl -> return $ renderTemplate' tpl context - --- | Escape things as needed for Texinfo. -stringToTexinfo :: String -> String -stringToTexinfo = escapeStringUsing texinfoEscapes - where texinfoEscapes = [ ('{', "@{") - , ('}', "@}") - , ('@', "@@") - , ('\160', "@ ") - , ('\x2014', "---") - , ('\x2013', "--") - , ('\x2026', "@dots{}") - , ('\x2019', "'") - ] - -escapeCommas :: PandocMonad m => TI m Doc -> TI m Doc -escapeCommas parser = do - oldEscapeComma <- gets stEscapeComma - modify $ \st -> st{ stEscapeComma = True } - res <- parser - modify $ \st -> st{ stEscapeComma = oldEscapeComma } - return res - --- | Puts contents into Texinfo command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '@' <> text cmd <> braces contents - --- | Convert Pandoc block element to Texinfo. -blockToTexinfo :: PandocMonad m - => Block -- ^ Block to convert - -> TI m Doc - -blockToTexinfo Null = return empty - -blockToTexinfo (Div _ bs) = blockListToTexinfo bs - -blockToTexinfo (Plain lst) = - inlineListToTexinfo lst - --- title beginning with fig: indicates that the image is a figure -blockToTexinfo (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> text "@caption" <> braces c) `fmap` - inlineListToTexinfo txt - img <- inlineToTexinfo (Image attr txt (src,tit)) - return $ text "@float" $$ img $$ capt $$ text "@end float" - -blockToTexinfo (Para lst) = - inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo - -blockToTexinfo (LineBlock lns) = - blockToTexinfo $ linesToPara lns - -blockToTexinfo (BlockQuote lst) = do - contents <- blockListToTexinfo lst - return $ text "@quotation" $$ - contents $$ - text "@end quotation" - -blockToTexinfo (CodeBlock _ str) = do - return $ blankline $$ - text "@verbatim" $$ - flush (text str) $$ - text "@end verbatim" <> blankline - -blockToTexinfo b@(RawBlock f str) - | f == "texinfo" = return $ text str - | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" - | otherwise = do - report $ BlockNotRendered b - return empty - -blockToTexinfo (BulletList lst) = do - items <- mapM listItemToTexinfo lst - return $ text "@itemize" $$ - vcat items $$ - text "@end itemize" <> blankline - -blockToTexinfo (OrderedList (start, numstyle, _) lst) = do - items <- mapM listItemToTexinfo lst - return $ text "@enumerate " <> exemplar $$ - vcat items $$ - text "@end enumerate" <> blankline - where - exemplar = case numstyle of - DefaultStyle -> decimal - Decimal -> decimal - Example -> decimal - UpperRoman -> decimal -- Roman numerals not supported - LowerRoman -> decimal - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - decimal = if start == 1 - then empty - else text (show start) - upperAlpha = text [chr $ ord 'A' + start - 1] - lowerAlpha = text [chr $ ord 'a' + start - 1] - -blockToTexinfo (DefinitionList lst) = do - items <- mapM defListItemToTexinfo lst - return $ text "@table @asis" $$ - vcat items $$ - text "@end table" <> blankline - -blockToTexinfo HorizontalRule = - -- XXX can't get the equivalent from LaTeX.hs to work - return $ text "@iftex" $$ - text "@bigskip@hrule@bigskip" $$ - text "@end iftex" $$ - text "@ifnottex" $$ - text (take 72 $ repeat '-') $$ - text "@end ifnottex" - -blockToTexinfo (Header 0 _ lst) = do - txt <- if null lst - then return $ text "Top" - else inlineListToTexinfo lst - return $ text "@node Top" $$ - text "@top " <> txt <> blankline - -blockToTexinfo (Header level _ lst) - | level < 1 || level > 4 = blockToTexinfo (Para lst) - | otherwise = do - node <- inlineListForNode lst - txt <- inlineListToTexinfo lst - idsUsed <- gets stIdentifiers - let id' = uniqueIdent lst idsUsed - modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } - sec <- seccmd level - return $ if (level > 0) && (level <= 4) - then blankline <> text "@node " <> node $$ - text sec <> txt $$ - text "@anchor" <> braces (text $ '#':id') - else txt - where - seccmd :: PandocMonad m => Int -> TI m String - seccmd 1 = return "@chapter " - seccmd 2 = return "@section " - seccmd 3 = return "@subsection " - seccmd 4 = return "@subsubsection " - seccmd _ = throwError $ PandocSomeError "illegal seccmd level" - -blockToTexinfo (Table caption aligns widths heads rows) = do - headers <- if all null heads - then return empty - else tableHeadToTexinfo aligns heads - captionText <- inlineListToTexinfo caption - rowsText <- mapM (tableRowToTexinfo aligns) rows - colDescriptors <- - if all (== 0) widths - then do -- use longest entry instead of column widths - cols <- mapM (mapM (liftM (render Nothing . hcat) . mapM blockToTexinfo)) $ - transpose $ heads : rows - return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols - else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths - let tableBody = text ("@multitable " ++ colDescriptors) $$ - headers $$ - vcat rowsText $$ - text "@end multitable" - return $ if isEmpty captionText - then tableBody <> blankline - else text "@float" $$ - tableBody $$ - inCmd "caption" captionText $$ - text "@end float" - -tableHeadToTexinfo :: PandocMonad m - => [Alignment] - -> [[Block]] - -> TI m Doc -tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " - -tableRowToTexinfo :: PandocMonad m - => [Alignment] - -> [[Block]] - -> TI m Doc -tableRowToTexinfo = tableAnyRowToTexinfo "@item " - -tableAnyRowToTexinfo :: PandocMonad m - => String - -> [Alignment] - -> [[Block]] - -> TI m Doc -tableAnyRowToTexinfo itemtype aligns cols = - zipWithM alignedBlock aligns cols >>= - return . (text itemtype $$) . foldl (\row item -> row $$ - (if isEmpty row then empty else text " @tab ") <> item) empty - -alignedBlock :: PandocMonad m - => Alignment - -> [Block] - -> TI m Doc --- XXX @flushleft and @flushright text won't get word wrapped. Since word --- wrapping is more important than alignment, we ignore the alignment. -alignedBlock _ = blockListToTexinfo -{- -alignedBlock AlignLeft col = do - b <- blockListToTexinfo col - return $ text "@flushleft" $$ b $$ text "@end flushleft" -alignedBlock AlignRight col = do - b <- blockListToTexinfo col - return $ text "@flushright" $$ b $$ text "@end flushright" -alignedBlock _ col = blockListToTexinfo col --} - --- | Convert Pandoc block elements to Texinfo. -blockListToTexinfo :: PandocMonad m - => [Block] - -> TI m Doc -blockListToTexinfo [] = return empty -blockListToTexinfo (x:xs) = do - x' <- blockToTexinfo x - case x of - Header level _ _ -> do - -- We need need to insert a menu for this node. - let (before, after) = break isHeaderBlock xs - before' <- blockListToTexinfo before - let menu = if level < 4 - then collectNodes (level + 1) after - else [] - lines' <- mapM makeMenuLine menu - let menu' = if null lines' - then empty - else text "@menu" $$ - vcat lines' $$ - text "@end menu" - after' <- blockListToTexinfo after - return $ x' $$ before' $$ menu' $$ after' - Para _ -> do - xs' <- blockListToTexinfo xs - case xs of - ((CodeBlock _ _):_) -> return $ x' $$ xs' - _ -> return $ x' $+$ xs' - _ -> do - xs' <- blockListToTexinfo xs - return $ x' $$ xs' - -collectNodes :: Int -> [Block] -> [Block] -collectNodes _ [] = [] -collectNodes level (x:xs) = - case x of - (Header hl _ _) -> - if hl < level - then [] - else if hl == level - then x : collectNodes level xs - else collectNodes level xs - _ -> - collectNodes level xs - -makeMenuLine :: PandocMonad m - => Block - -> TI m Doc -makeMenuLine (Header _ _ lst) = do - txt <- inlineListForNode lst - return $ text "* " <> txt <> text "::" -makeMenuLine _ = throwError $ PandocSomeError "makeMenuLine called with non-Header block" - -listItemToTexinfo :: PandocMonad m - => [Block] - -> TI m Doc -listItemToTexinfo lst = do - contents <- blockListToTexinfo lst - let spacer = case reverse lst of - (Para{}:_) -> blankline - _ -> empty - return $ text "@item" $$ contents <> spacer - -defListItemToTexinfo :: PandocMonad m - => ([Inline], [[Block]]) - -> TI m Doc -defListItemToTexinfo (term, defs) = do - term' <- inlineListToTexinfo term - let defToTexinfo bs = do d <- blockListToTexinfo bs - case reverse bs of - (Para{}:_) -> return $ d <> blankline - _ -> return d - defs' <- mapM defToTexinfo defs - return $ text "@item " <> term' $+$ vcat defs' - --- | Convert list of inline elements to Texinfo. -inlineListToTexinfo :: PandocMonad m - => [Inline] -- ^ Inlines to convert - -> TI m Doc -inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat - --- | Convert list of inline elements to Texinfo acceptable for a node name. -inlineListForNode :: PandocMonad m - => [Inline] -- ^ Inlines to convert - -> TI m Doc -inlineListForNode = return . text . stringToTexinfo . - filter (not . disallowedInNode) . stringify - --- periods, commas, colons, and parentheses are disallowed in node names -disallowedInNode :: Char -> Bool -disallowedInNode c = c `elem` (".,:()" :: String) - --- | Convert inline element to Texinfo -inlineToTexinfo :: PandocMonad m - => Inline -- ^ Inline to convert - -> TI m Doc - -inlineToTexinfo (Span _ lst) = - inlineListToTexinfo lst - -inlineToTexinfo (Emph lst) = - inlineListToTexinfo lst >>= return . inCmd "emph" - -inlineToTexinfo (Strong lst) = - inlineListToTexinfo lst >>= return . inCmd "strong" - -inlineToTexinfo (Strikeout lst) = do - modify $ \st -> st{ stStrikeout = True } - contents <- inlineListToTexinfo lst - return $ text "@textstrikeout{" <> contents <> text "}" - -inlineToTexinfo (Superscript lst) = do - modify $ \st -> st{ stSuperscript = True } - contents <- inlineListToTexinfo lst - return $ text "@textsuperscript{" <> contents <> char '}' - -inlineToTexinfo (Subscript lst) = do - modify $ \st -> st{ stSubscript = True } - contents <- inlineListToTexinfo lst - return $ text "@textsubscript{" <> contents <> char '}' - -inlineToTexinfo (SmallCaps lst) = - inlineListToTexinfo lst >>= return . inCmd "sc" - -inlineToTexinfo (Code _ str) = do - return $ text $ "@code{" ++ stringToTexinfo str ++ "}" - -inlineToTexinfo (Quoted SingleQuote lst) = do - contents <- inlineListToTexinfo lst - return $ char '`' <> contents <> char '\'' - -inlineToTexinfo (Quoted DoubleQuote lst) = do - contents <- inlineListToTexinfo lst - return $ text "``" <> contents <> text "''" - -inlineToTexinfo (Cite _ lst) = - inlineListToTexinfo lst -inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) -inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo il@(RawInline f str) - | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" - | f == "texinfo" = return $ text str - | otherwise = do - report $ InlineNotRendered il - return empty -inlineToTexinfo (LineBreak) = return $ text "@*" <> cr -inlineToTexinfo SoftBreak = do - wrapText <- gets (writerWrapText . stOptions) - case wrapText of - WrapAuto -> return space - WrapNone -> return space - WrapPreserve -> return cr -inlineToTexinfo Space = return space - -inlineToTexinfo (Link _ txt (src@('#':_), _)) = do - contents <- escapeCommas $ inlineListToTexinfo txt - return $ text "@ref" <> - braces (text (stringToTexinfo src) <> text "," <> contents) -inlineToTexinfo (Link _ txt (src, _)) = do - case txt of - [Str x] | escapeURI x == src -> -- autolink - do return $ text $ "@url{" ++ x ++ "}" - _ -> do contents <- escapeCommas $ inlineListToTexinfo txt - let src1 = stringToTexinfo src - return $ text ("@uref{" ++ src1 ++ ",") <> contents <> - char '}' - -inlineToTexinfo (Image attr alternate (source, _)) = do - content <- escapeCommas $ inlineListToTexinfo alternate - opts <- gets stOptions - let showDim dim = case (dimension dim attr) of - (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in" - (Just (Percent _)) -> "" - (Just d) -> show d - Nothing -> "" - return $ text ("@image{" ++ base ++ ',':(showDim Width) ++ ',':(showDim Height) ++ ",") - <> content <> text "," <> text (ext ++ "}") - where - ext = drop 1 $ takeExtension source' - base = dropExtension source' - source' = if isURI source - then source - else unEscapeString source - -inlineToTexinfo (Note contents) = do - contents' <- blockListToTexinfo contents - return $ text "@footnote" <> braces contents' diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs deleted file mode 100644 index 45f1780cf..000000000 --- a/src/Text/Pandoc/Writers/Textile.hs +++ /dev/null @@ -1,486 +0,0 @@ -{- -Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2015 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Textile markup. - -Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> --} -module Text.Pandoc.Writers.Textile ( writeTextile ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared -import Text.Pandoc.Pretty (render) -import Text.Pandoc.ImageSize -import Text.Pandoc.Writers.Shared -import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intercalate ) -import Control.Monad.State -import Data.Char ( isSpace ) -import Text.Pandoc.Class ( PandocMonad ) - -data WriterState = WriterState { - stNotes :: [String] -- Footnotes - , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" - , stStartNum :: Maybe Int -- Start number if first list item - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list - } - --- | Convert Pandoc to Textile. -writeTextile :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeTextile opts document = return $ - evalState (pandocToTextile opts document) - WriterState { stNotes = [], stListLevel = [], stStartNum = Nothing, - stUseTags = False } - --- | Return Textile representation of document. -pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String -pandocToTextile opts (Pandoc meta blocks) = do - metadata <- metaToJSON opts (blockListToTextile opts) - (inlineListToTextile opts) meta - body <- blockListToTextile opts blocks - notes <- liftM (unlines . reverse . stNotes) get - let main = body ++ if null notes then "" else ("\n\n" ++ notes) - let context = defField "body" main metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> return $ renderTemplate' tpl context - -withUseTags :: State WriterState a -> State WriterState a -withUseTags action = do - oldUseTags <- liftM stUseTags get - modify $ \s -> s { stUseTags = True } - result <- action - modify $ \s -> s { stUseTags = oldUseTags } - return result - --- | Escape one character as needed for Textile. -escapeCharForTextile :: Char -> String -escapeCharForTextile x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - '*' -> "*" - '_' -> "_" - '@' -> "@" - '+' -> "+" - '-' -> "-" - '|' -> "|" - '\x2014' -> " -- " - '\x2013' -> " - " - '\x2019' -> "'" - '\x2026' -> "..." - c -> [c] - --- | Escape string as needed for Textile. -escapeStringForTextile :: String -> String -escapeStringForTextile = concatMap escapeCharForTextile - --- | Convert Pandoc block element to Textile. -blockToTextile :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState String - -blockToTextile _ Null = return "" - -blockToTextile opts (Div attr bs) = do - let startTag = render Nothing $ tagWithAttrs "div" attr - let endTag = "</div>" - contents <- blockListToTextile opts bs - return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" - -blockToTextile opts (Plain inlines) = - inlineListToTextile opts inlines - --- title beginning with fig: indicates that the image is a figure -blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- blockToTextile opts (Para txt) - im <- inlineToTextile opts (Image attr txt (src,tit)) - return $ im ++ "\n" ++ capt - -blockToTextile opts (Para inlines) = do - useTags <- liftM stUseTags get - listLevel <- liftM stListLevel get - contents <- inlineListToTextile opts inlines - return $ if useTags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null listLevel then "\n" else "" - -blockToTextile opts (LineBlock lns) = - blockToTextile opts $ linesToPara lns - -blockToTextile _ (RawBlock f str) - | f == Format "html" || f == Format "textile" = return str - | otherwise = return "" - -blockToTextile _ HorizontalRule = return "<hr />\n" - -blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do - contents <- inlineListToTextile opts inlines - let identAttr = if null ident then "" else ('#':ident) - let attribs = if null identAttr && null classes - then "" - else "(" ++ unwords classes ++ identAttr ++ ")" - let lang = maybe "" (\x -> "[" ++ x ++ "]") $ lookup "lang" keyvals - let styles = maybe "" (\x -> "{" ++ x ++ "}") $ lookup "style" keyvals - let prefix = 'h' : show level ++ attribs ++ styles ++ lang ++ ". " - return $ prefix ++ contents ++ "\n" - -blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) = - return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++ - "\n</pre>\n" - where classes' = if null classes - then "" - else " class=\"" ++ unwords classes ++ "\"" - -blockToTextile _ (CodeBlock (_,classes,_) str) = - return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n" - where classes' = if null classes - then "" - else "(" ++ unwords classes ++ ")" - -blockToTextile opts (BlockQuote bs@[Para _]) = do - contents <- blockListToTextile opts bs - return $ "bq. " ++ contents ++ "\n\n" - -blockToTextile opts (BlockQuote blocks) = do - contents <- blockListToTextile opts blocks - return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n" - -blockToTextile opts (Table [] aligns widths headers rows') | - all (==0) widths = do - hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers - let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|" - let header = if all null headers then "" else cellsToRow hs ++ "\n" - let blocksToCell (align, bs) = do - contents <- stripTrailingNewlines <$> blockListToTextile opts bs - let alignMarker = case align of - AlignLeft -> "<. " - AlignRight -> ">. " - AlignCenter -> "=. " - AlignDefault -> "" - return $ alignMarker ++ contents - let rowToCells = mapM blocksToCell . zip aligns - bs <- mapM rowToCells rows' - let body = unlines $ map cellsToRow bs - return $ header ++ body - -blockToTextile opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return "" - else do - c <- inlineListToTextile opts capt - return $ "<caption>" ++ c ++ "</caption>\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let coltags = if all (== 0.0) widths - then "" - else unlines $ map - (\w -> "<col width=\"" ++ percent w ++ "\" />") widths - head' <- if all null headers - then return "" - else do - hs <- tableRowToTextile opts alignStrings 0 headers - return $ "<thead>\n" ++ hs ++ "\n</thead>\n" - body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows' - return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ - "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" - -blockToTextile opts x@(BulletList items) = do - oldUseTags <- liftM stUseTags get - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" - else do - modify $ \s -> s { stListLevel = stListLevel s ++ "*" } - level <- get >>= return . length . stListLevel - contents <- mapM (listItemToTextile opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ (if level > 1 then "" else "\n") - -blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do - oldUseTags <- liftM stUseTags get - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ - "\n</ol>\n" - else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" - , stStartNum = if start > 1 - then Just start - else Nothing } - level <- get >>= return . length . stListLevel - contents <- mapM (listItemToTextile opts) items - modify $ \s -> s { stListLevel = init (stListLevel s), - stStartNum = Nothing } - return $ vcat contents ++ (if level > 1 then "" else "\n") - -blockToTextile opts (DefinitionList items) = do - contents <- withUseTags $ mapM (definitionListItemToTextile opts) items - return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n" - --- Auxiliary functions for lists: - --- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle - in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ - (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" - else "") - --- | Convert bullet or ordered list item (list of blocks) to Textile. -listItemToTextile :: WriterOptions -> [Block] -> State WriterState String -listItemToTextile opts items = do - contents <- blockListToTextile opts items - useTags <- get >>= return . stUseTags - if useTags - then return $ "<li>" ++ contents ++ "</li>" - else do - marker <- gets stListLevel - mbstart <- gets stStartNum - case mbstart of - Just n -> do - modify $ \s -> s{ stStartNum = Nothing } - return $ marker ++ show n ++ " " ++ contents - Nothing -> return $ marker ++ " " ++ contents - --- | Convert definition list item (label, list of blocks) to Textile. -definitionListItemToTextile :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState String -definitionListItemToTextile opts (label, items) = do - labelText <- inlineListToTextile opts label - contents <- mapM (blockListToTextile opts) items - return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents) - --- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. -isSimpleList :: Block -> Bool -isSimpleList x = - case x of - BulletList items -> all isSimpleListItem items - OrderedList (_, sty, _) items -> all isSimpleListItem items && - sty `elem` [DefaultStyle, Decimal] - _ -> False - --- | True if list item can be handled with the simple wiki syntax. False if --- HTML tags will be needed. -isSimpleListItem :: [Block] -> Bool -isSimpleListItem [] = True -isSimpleListItem [x] = - case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - _ -> False -isSimpleListItem [x, y] | isPlainOrPara x = - case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - _ -> False -isSimpleListItem _ = False - -isPlainOrPara :: Block -> Bool -isPlainOrPara (Plain _) = True -isPlainOrPara (Para _) = True -isPlainOrPara _ = False - --- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" - --- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, --- and Textile writers, and should be abstracted out.) - -tableRowToTextile :: WriterOptions - -> [String] - -> Int - -> [[Block]] - -> State WriterState String -tableRowToTextile opts alignStrings rownum cols' = do - let celltype = if rownum == 0 then "th" else "td" - let rowclass = case rownum of - 0 -> "header" - x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToTextile opts celltype alignment item) - alignStrings cols' - return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableItemToTextile :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String -tableItemToTextile opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "</" ++ celltype ++ ">" - contents <- blockListToTextile opts item - return $ mkcell contents - --- | Convert list of Pandoc block elements to Textile. -blockListToTextile :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState String -blockListToTextile opts blocks = - mapM (blockToTextile opts) blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to Textile. -inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String -inlineListToTextile opts lst = - mapM (inlineToTextile opts) lst >>= return . concat - --- | Convert Pandoc inline element to Textile. -inlineToTextile :: WriterOptions -> Inline -> State WriterState String - -inlineToTextile opts (Span _ lst) = - inlineListToTextile opts lst - -inlineToTextile opts (Emph lst) = do - contents <- inlineListToTextile opts lst - return $ if '_' `elem` contents - then "<em>" ++ contents ++ "</em>" - else "_" ++ contents ++ "_" - -inlineToTextile opts (Strong lst) = do - contents <- inlineListToTextile opts lst - return $ if '*' `elem` contents - then "<strong>" ++ contents ++ "</strong>" - else "*" ++ contents ++ "*" - -inlineToTextile opts (Strikeout lst) = do - contents <- inlineListToTextile opts lst - return $ if '-' `elem` contents - then "<del>" ++ contents ++ "</del>" - else "-" ++ contents ++ "-" - -inlineToTextile opts (Superscript lst) = do - contents <- inlineListToTextile opts lst - return $ if '^' `elem` contents - then "<sup>" ++ contents ++ "</sup>" - else "[^" ++ contents ++ "^]" - -inlineToTextile opts (Subscript lst) = do - contents <- inlineListToTextile opts lst - return $ if '~' `elem` contents - then "<sub>" ++ contents ++ "</sub>" - else "[~" ++ contents ++ "~]" - -inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst - -inlineToTextile opts (Quoted SingleQuote lst) = do - contents <- inlineListToTextile opts lst - return $ "'" ++ contents ++ "'" - -inlineToTextile opts (Quoted DoubleQuote lst) = do - contents <- inlineListToTextile opts lst - return $ "\"" ++ contents ++ "\"" - -inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst - -inlineToTextile _ (Code _ str) = - return $ if '@' `elem` str - then "<tt>" ++ escapeStringForXML str ++ "</tt>" - else "@" ++ str ++ "@" - -inlineToTextile _ (Str str) = return $ escapeStringForTextile str - -inlineToTextile _ (Math _ str) = - return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>" - -inlineToTextile opts (RawInline f str) - | f == Format "html" || f == Format "textile" = return str - | (f == Format "latex" || f == Format "tex") && - isEnabled Ext_raw_tex opts = return str - | otherwise = return "" - -inlineToTextile _ LineBreak = return "\n" - -inlineToTextile _ SoftBreak = return " " - -inlineToTextile _ Space = return " " - -inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do - let classes = if null cls - then "" - else "(" ++ unwords cls ++ ")" - label <- case txt of - [Code _ s] - | s == src -> return "$" - [Str s] - | s == src -> return "$" - _ -> inlineListToTextile opts txt - return $ "\"" ++ classes ++ label ++ "\":" ++ src - -inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do - alt' <- inlineListToTextile opts alt - let txt = if null tit - then if null alt' - then "" - else "(" ++ alt' ++ ")" - else "(" ++ tit ++ ")" - classes = if null cls - then "" - else "(" ++ unwords cls ++ ")" - showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";" - in case (dimension dir attr) of - Just (Percent a) -> toCss $ show (Percent a) - Just dim -> toCss $ showInPixel opts dim ++ "px" - Nothing -> Nothing - styles = case (showDim Width, showDim Height) of - (Just w, Just h) -> "{" ++ w ++ h ++ "}" - (Just w, Nothing) -> "{" ++ w ++ "height:auto;}" - (Nothing, Just h) -> "{" ++ "width:auto;" ++ h ++ "}" - (Nothing, Nothing) -> "" - return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!" - -inlineToTextile opts (Note contents) = do - curNotes <- liftM stNotes get - let newnum = length curNotes + 1 - contents' <- blockListToTextile opts contents - let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" - modify $ \s -> s { stNotes = thisnote : curNotes } - return $ "[" ++ show newnum ++ "]" - -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs deleted file mode 100644 index d01ce0e8b..000000000 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ /dev/null @@ -1,396 +0,0 @@ -{- -Copyright (C) 2008-2015 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.ZimWiki - Copyright : Copyright (C) 2008-2015 John MacFarlane, 2017 Alex Ivkin - License : GNU GPL, version 2 or above - - Maintainer : Alex Ivkin <alex@ivkin.net> - Stability : beta - Portability : portable - -Conversion of 'Pandoc' documents to ZimWiki markup. - -http://zim-wiki.org/manual/Help/Wiki_Syntax.html --} - -module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where -import Text.Pandoc.Definition -import Text.Pandoc.Options ( WriterOptions(writerTableOfContents, writerTemplate, writerWrapText), WrapOption(..) ) -import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting, trimr - , substitute ) -import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) -import Text.Pandoc.ImageSize -import Text.Pandoc.Templates ( renderTemplate' ) -import Data.List ( intercalate, isPrefixOf, transpose, isInfixOf ) -import Data.Text ( breakOnAll, pack ) -import Data.Default (Default(..)) -import Network.URI ( isURI ) -import Control.Monad ( zipWithM ) -import Control.Monad.State ( modify, State, get, evalState ) -import Text.Pandoc.Class ( PandocMonad ) -import qualified Data.Map as Map - -data WriterState = WriterState { - stItemNum :: Int, - stIndent :: String, -- Indent after the marker at the beginning of list items - stInTable :: Bool, -- Inside a table - stInLink :: Bool -- Inside a link description - } - -instance Default WriterState where - def = WriterState { stItemNum = 1, stIndent = "", stInTable = False, stInLink = False } - --- | Convert Pandoc to ZimWiki. -writeZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String -writeZimWiki opts document = return $ evalState (pandocToZimWiki opts document) def - --- | Return ZimWiki representation of document. -pandocToZimWiki :: WriterOptions -> Pandoc -> State WriterState String -pandocToZimWiki opts (Pandoc meta blocks) = do - metadata <- metaToJSON opts - (fmap trimr . blockListToZimWiki opts) - (inlineListToZimWiki opts) - meta - body <- blockListToZimWiki opts blocks - --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" - let main = body - let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata - case writerTemplate opts of - Just tpl -> return $ renderTemplate' tpl context - Nothing -> return main - --- | Escape special characters for ZimWiki. -escapeString :: String -> String -escapeString = substitute "__" "''__''" . - substitute "**" "''**''" . - substitute "~~" "''~~''" . - substitute "//" "''//''" - --- | Convert Pandoc block element to ZimWiki. -blockToZimWiki :: WriterOptions -> Block -> State WriterState String - -blockToZimWiki _ Null = return "" - -blockToZimWiki opts (Div _attrs bs) = do - contents <- blockListToZimWiki opts bs - return $ contents ++ "\n" - -blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines - --- title beginning with fig: indicates that the image is a figure --- ZimWiki doesn't support captions - so combine together alt and caption into alt -blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return "" - else (" " ++) `fmap` inlineListToZimWiki opts txt - let opt = if null txt - then "" - else "|" ++ if null tit then capt else tit ++ capt - -- Relative links fail isURI and receive a colon - prefix = if isURI src then "" else ":" - return $ "{{" ++ prefix ++ src ++ imageDims opts attr ++ opt ++ "}}\n" - -blockToZimWiki opts (Para inlines) = do - indent <- stIndent <$> get - -- useTags <- stUseTags <$> get - contents <- inlineListToZimWiki opts inlines - return $ contents ++ if null indent then "\n" else "" - -blockToZimWiki opts (LineBlock lns) = do - blockToZimWiki opts $ linesToPara lns - -blockToZimWiki opts (RawBlock f str) - | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" - -blockToZimWiki _ HorizontalRule = return "\n----\n" - -blockToZimWiki opts (Header level _ inlines) = do - contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" - -blockToZimWiki _ (CodeBlock (_,classes,_) str) = do - -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using - let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")] - let langmap = Map.fromList langal - return $ case classes of - [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block - (x:_) -> "{{{code: lang=\"" ++ - (case Map.lookup x langmap of - Nothing -> x - Just y -> y) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec - -blockToZimWiki opts (BlockQuote blocks) = do - contents <- blockListToZimWiki opts blocks - return $ unlines $ map ("> " ++) $ lines contents - -blockToZimWiki opts (Table capt aligns _ headers rows) = do - captionDoc <- if null capt - then return "" - else do - c <- inlineListToZimWiki opts capt - return $ "" ++ c ++ "\n" - headers' <- if all null headers - then zipWithM (tableItemToZimWiki opts) aligns (rows !! 0) - else mapM (inlineListToZimWiki opts) (map removeFormatting headers) -- emphasis, links etc. are not allowed in table headers - rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') - let padTo (width, al) s = - case (width - length s) of - x | x > 0 -> - if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' - else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' - | otherwise -> s - let borderCell (width, al) _ = - if al == AlignLeft - then ":"++ replicate (width-1) '-' - else if al == AlignDefault - then replicate width '-' - else if al == AlignRight - then replicate (width-1) '-' ++ ":" - else ":" ++ replicate (width-2) '-' ++ ":" - let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|" - let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|" - return $ captionDoc ++ - (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++ - unlines (map renderRow rows') - -blockToZimWiki opts (BulletList items) = do - indent <- stIndent <$> get - modify $ \s -> s { stIndent = stIndent s ++ "\t" } - contents <- (mapM (listItemToZimWiki opts) items) - modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } - return $ vcat contents ++ if null indent then "\n" else "" - -blockToZimWiki opts (OrderedList _ items) = do - indent <- stIndent <$> get - modify $ \s -> s { stIndent = stIndent s ++ "\t", stItemNum = 1 } - contents <- (mapM (orderedListItemToZimWiki opts) items) - modify $ \s -> s{ stIndent = indent } -- drop 1 (stIndent s) } - return $ vcat contents ++ if null indent then "\n" else "" - -blockToZimWiki opts (DefinitionList items) = do - contents <- (mapM (definitionListItemToZimWiki opts) items) - return $ vcat contents - -definitionListItemToZimWiki :: WriterOptions -> ([Inline],[[Block]]) -> State WriterState String -definitionListItemToZimWiki opts (label, items) = do - labelText <- inlineListToZimWiki opts label - contents <- mapM (blockListToZimWiki opts) items - indent <- stIndent <$> get - return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents - --- Auxiliary functions for lists: -indentFromHTML :: WriterOptions -> String -> State WriterState String -indentFromHTML _ str = do - indent <- stIndent <$> get - itemnum <- stItemNum <$> get - if isInfixOf "<li>" str then return $ indent ++ show itemnum ++ "." - else if isInfixOf "</li>" str then return "\n" - else if isInfixOf "<li value=" str then do - -- poor man's cut - let val = drop 10 $ reverse $ drop 1 $ reverse str - --let val = take ((length valls) - 2) valls - modify $ \s -> s { stItemNum = read val } - return "" - else if isInfixOf "<ol>" str then do - let olcount=countSubStrs "<ol>" str - modify $ \s -> s { stIndent = stIndent s ++ replicate olcount '\t', stItemNum = 1 } - return "" - else if isInfixOf "</ol>" str then do - let olcount=countSubStrs "/<ol>" str - modify $ \s -> s{ stIndent = drop olcount (stIndent s) } - return "" - else - return "" - -countSubStrs :: String -> String -> Int -countSubStrs sub str = length $ breakOnAll (pack sub) (pack str) - -cleanupCode :: String -> String -cleanupCode = substitute "<nowiki>" "" . substitute "</nowiki>" "" - -vcat :: [String] -> String -vcat = intercalate "\n" - --- | Convert bullet list item (list of blocks) to ZimWiki. -listItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String -listItemToZimWiki opts items = do - contents <- blockListToZimWiki opts items - indent <- stIndent <$> get - return $ indent ++ "* " ++ contents - --- | Convert ordered list item (list of blocks) to ZimWiki. -orderedListItemToZimWiki :: WriterOptions -> [Block] -> State WriterState String -orderedListItemToZimWiki opts items = do - contents <- blockListToZimWiki opts items - indent <- stIndent <$> get - itemnum <- stItemNum <$> get - --modify $ \s -> s { stItemNum = itemnum + 1 } -- this is not strictly necessary for zim as zim does its own renumbering - return $ indent ++ show itemnum ++ ". " ++ contents - --- Auxiliary functions for tables: -tableItemToZimWiki :: WriterOptions -> Alignment -> [Block] -> State WriterState String -tableItemToZimWiki opts align' item = do - let mkcell x = (if align' == AlignRight || align' == AlignCenter - then " " - else "") ++ x ++ - (if align' == AlignLeft || align' == AlignCenter - then " " - else "") - modify $ \s -> s { stInTable = True } - contents <- blockListToZimWiki opts item - modify $ \s -> s { stInTable = False } - return $ mkcell contents - --- | Convert list of Pandoc block elements to ZimWiki. -blockListToZimWiki :: WriterOptions -> [Block] -> State WriterState String -blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks - --- | Convert list of Pandoc inline elements to ZimWiki. -inlineListToZimWiki :: WriterOptions -> [Inline] -> State WriterState String -inlineListToZimWiki opts lst = concat <$> (mapM (inlineToZimWiki opts) lst) - --- | Convert Pandoc inline element to ZimWiki. -inlineToZimWiki :: WriterOptions -> Inline -> State WriterState String - -inlineToZimWiki opts (Emph lst) = do - contents <- inlineListToZimWiki opts lst - return $ "//" ++ contents ++ "//" - -inlineToZimWiki opts (Strong lst) = do - contents <- inlineListToZimWiki opts lst - return $ "**" ++ contents ++ "**" - -inlineToZimWiki opts (Strikeout lst) = do - contents <- inlineListToZimWiki opts lst - return $ "~~" ++ contents ++ "~~" - -inlineToZimWiki opts (Superscript lst) = do - contents <- inlineListToZimWiki opts lst - return $ "^{" ++ contents ++ "}" - -inlineToZimWiki opts (Subscript lst) = do - contents <- inlineListToZimWiki opts lst - return $ "_{" ++ contents ++ "}" - -inlineToZimWiki opts (Quoted SingleQuote lst) = do - contents <- inlineListToZimWiki opts lst - return $ "\8216" ++ contents ++ "\8217" - -inlineToZimWiki opts (Quoted DoubleQuote lst) = do - contents <- inlineListToZimWiki opts lst - return $ "\8220" ++ contents ++ "\8221" - -inlineToZimWiki opts (Span _attrs ils) = inlineListToZimWiki opts ils - -inlineToZimWiki opts (SmallCaps lst) = inlineListToZimWiki opts lst - -inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst - -inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''" - -inlineToZimWiki _ (Str str) = do - inTable <- stInTable <$> get - inLink <- stInLink <$> get - if inTable - then return $ substitute "|" "\\|" . escapeString $ str - else - if inLink - then return $ str - else return $ escapeString str - -inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped - where delim = case mathType of - DisplayMath -> "$$" - InlineMath -> "$" - --- | f == Format "html" = return $ "<html>" ++ str ++ "</html>" -inlineToZimWiki opts (RawInline f str) - | f == Format "zimwiki" = return str - | f == Format "html" = do cont <- indentFromHTML opts str; return cont - | otherwise = return "" - -inlineToZimWiki _ LineBreak = do - inTable <- stInTable <$> get - if inTable - then return "\\n" - else return "\n" - -inlineToZimWiki opts SoftBreak = - case writerWrapText opts of - WrapNone -> return " " - WrapAuto -> return " " - WrapPreserve -> return "\n" - -inlineToZimWiki _ Space = return " " - -inlineToZimWiki opts (Link _ txt (src, _)) = do - inTable <- stInTable <$> get - modify $ \s -> s { stInLink = True } - label <- inlineListToZimWiki opts $ removeFormatting txt -- zim does not allow formatting in link text, it takes the text verbatim, no need to escape it - modify $ \s -> s { stInLink = False } - let label'= if inTable - then "" -- no label is allowed in a table - else "|"++label - case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" - | escapeURI s == src -> return src - _ -> if isURI src - then return $ "[[" ++ src ++ label' ++ "]]" - else return $ "[[" ++ src' ++ label' ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page -inlineToZimWiki opts (Image attr alt (source, tit)) = do - alt' <- inlineListToZimWiki opts alt - inTable <- stInTable <$> get - let txt = case (tit, alt, inTable) of - ("",[], _) -> "" - ("", _, False ) -> "|" ++ alt' - (_ , _, False ) -> "|" ++ tit - (_ , _, True ) -> "" - -- Relative links fail isURI and receive a colon - prefix = if isURI source then "" else ":" - return $ "{{" ++ prefix ++ source ++ imageDims opts attr ++ txt ++ "}}" - -inlineToZimWiki opts (Note contents) = do - -- no concept of notes in zim wiki, use a text block - contents' <- blockListToZimWiki opts contents - return $ " **{Note:** " ++ trimr contents' ++ "**}**" - -imageDims :: WriterOptions -> Attr -> String -imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) - where - toPx = fmap (showInPixel opts) . checkPct - checkPct (Just (Percent _)) = Nothing - checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h - go Nothing Nothing = "" diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs deleted file mode 100644 index e105aee91..000000000 --- a/src/Text/Pandoc/XML.hs +++ /dev/null @@ -1,115 +0,0 @@ -{- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2016 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for escaping and formatting XML. --} -module Text.Pandoc.XML ( escapeCharForXML, - escapeStringForXML, - inTags, - selfClosingTag, - inTagsSimple, - inTagsIndented, - toEntities, - fromEntities ) where - -import Text.Pandoc.Pretty -import Data.Char (ord, isAscii, isSpace) -import Text.HTML.TagSoup.Entity (lookupEntity) - --- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String -escapeCharForXML x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - c -> [c] - --- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML = concatMap escapeCharForXML - --- | Escape newline characters as -escapeNls :: String -> String -escapeNls (x:xs) - | x == '\n' = " " ++ escapeNls xs - | otherwise = x : escapeNls xs -escapeNls [] = [] - --- | Return a text object with a string of formatted XML attributes. -attributeList :: [(String, String)] -> Doc -attributeList = hcat . map - (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++ - escapeNls (escapeStringForXML b) ++ "\"")) - --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes and (if specified) indentation. -inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc -inTags isIndented tagType attribs contents = - let openTag = char '<' <> text tagType <> attributeList attribs <> - char '>' - closeTag = text "</" <> text tagType <> char '>' - in if isIndented - then openTag $$ nest 2 contents $$ closeTag - else openTag <> contents <> closeTag - --- | Return a self-closing tag of tagType with specified attributes -selfClosingTag :: String -> [(String, String)] -> Doc -selfClosingTag tagType attribs = - char '<' <> text tagType <> attributeList attribs <> text " />" - --- | Put the supplied contents between start and end tags of tagType. -inTagsSimple :: String -> Doc -> Doc -inTagsSimple tagType = inTags False tagType [] - --- | Put the supplied contents in indented block btw start and end tags. -inTagsIndented :: String -> Doc -> Doc -inTagsIndented tagType = inTags True tagType [] - --- | Escape all non-ascii characters using numerical entities. -toEntities :: String -> String -toEntities [] = "" -toEntities (c:cs) - | isAscii c = c : toEntities cs - | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs - --- Unescapes XML entities -fromEntities :: String -> String -fromEntities ('&':xs) = - case lookupEntity ent' of - Just c -> c ++ fromEntities rest - Nothing -> '&' : fromEntities xs - where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of - (zs,';':ys) -> (zs,ys) - (zs, ys) -> (zs,ys) - ent' = case ent of - '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" - -fromEntities (x:xs) = x : fromEntities xs -fromEntities [] = [] |