aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs82
-rw-r--r--src/Text/Pandoc/BCP47.hs139
-rw-r--r--src/Text/Pandoc/Class.hs34
-rw-r--r--src/Text/Pandoc/Compat/Time.hs2
-rw-r--r--src/Text/Pandoc/Error.hs8
-rw-r--r--src/Text/Pandoc/Extensions.hs12
-rw-r--r--src/Text/Pandoc/Logging.hs32
-rw-r--r--src/Text/Pandoc/Lua.hs233
-rw-r--r--src/Text/Pandoc/Lua/PandocModule.hs15
-rw-r--r--src/Text/Pandoc/Options.hs4
-rw-r--r--src/Text/Pandoc/PDF.hs47
-rw-r--r--src/Text/Pandoc/Parsing.hs103
-rw-r--r--src/Text/Pandoc/Pretty.hs2
-rw-r--r--src/Text/Pandoc/Readers.hs18
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs7
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs2
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs2
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs20
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs4
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2774
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs48
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs108
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs10
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs607
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs6
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs5
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs5
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs10
-rw-r--r--src/Text/Pandoc/Readers/RST.hs53
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs11
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs12
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs673
-rw-r--r--src/Text/Pandoc/Shared.hs14
-rw-r--r--src/Text/Pandoc/Templates.hs24
-rw-r--r--src/Text/Pandoc/Writers.hs11
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs4
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs8
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs86
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs7
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs12
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs24
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs4
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs78
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs13
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs60
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs4
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs8
-rw-r--r--src/Text/Pandoc/Writers/JATS.hs12
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs246
-rw-r--r--src/Text/Pandoc/Writers/Man.hs4
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs31
-rw-r--r--src/Text/Pandoc/Writers/Math.hs7
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs7
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs7
-rw-r--r--src/Text/Pandoc/Writers/Muse.hs49
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs48
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs6
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs42
-rw-r--r--src/Text/Pandoc/Writers/Org.hs4
-rw-r--r--src/Text/Pandoc/Writers/RST.hs4
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs13
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs12
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs4
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs4
-rw-r--r--src/Text/Pandoc/Writers/ZimWiki.hs4
69 files changed, 4164 insertions, 1801 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 19066e8b7..68bdc1432 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -76,14 +76,16 @@ import System.IO.Error (isDoesNotExistError)
import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog,
- setResourcePath, withMediaBag)
+ setResourcePath, withMediaBag, setTrace)
import Text.Pandoc.Highlighting (highlightingStyles)
-import Text.Pandoc.Lua (runLuaFilter)
+import Text.Pandoc.Lua (runLuaFilter, LuaException(..))
+import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Process (pipeProcess)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile,
- readDataFileUTF8, safeRead, tabFilter)
+ readDataFileUTF8, safeRead, tabFilter,
+ eastAsianLineBreakFilter)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (toEntities)
import Text.Printf
@@ -133,11 +135,11 @@ convertWithOpts opts = do
Nothing -> return Nothing
Just fp -> Just <$> UTF8.readFile fp
- let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css"
let mathMethod =
case (optKaTeXJS opts, optKaTeXStylesheet opts) of
(Nothing, _) -> optHTMLMathMethod opts
- (Just js, ss) -> KaTeX js (fromMaybe csscdn ss)
+ (Just js, ss) -> KaTeX js (fromMaybe
+ (defaultKaTeXURL ++ "katex.min.css") ss)
-- --bibliography implies -F pandoc-citeproc for backwards compatibility:
@@ -181,11 +183,12 @@ convertWithOpts opts = do
let msOutput = format == "ms"
-- disabling the custom writer for now
- writer <- if ".lua" `isSuffixOf` format
+ (writer, writerExts) <-
+ if ".lua" `isSuffixOf` format
-- note: use non-lowercased version writerName
then return (TextWriter
(\o d -> liftIO $ writeCustom writerName o d)
- :: Writer PandocIO)
+ :: Writer PandocIO, mempty)
else case getWriter writerName of
Left e -> E.throwIO $ PandocAppError $
if format == "pdf"
@@ -195,12 +198,13 @@ convertWithOpts opts = do
"\nand specify an output file with " ++
".pdf extension (-o filename.pdf)."
else e
- Right w -> return (w :: Writer PandocIO)
+ Right (w, es) -> return (w :: Writer PandocIO, es)
-- TODO: we have to get the input and the output into the state for
-- the sake of the text2tags reader.
- reader <- case getReader readerName of
- Right r -> return (r :: Reader PandocIO)
+ (reader, readerExts) <-
+ case getReader readerName of
+ Right (r, es) -> return (r :: Reader PandocIO, es)
Left e -> E.throwIO $ PandocAppError e'
where e' = case readerName of
"pdf" -> e ++
@@ -304,11 +308,11 @@ convertWithOpts opts = do
, readerColumns = optColumns opts
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses = optIndentedCodeClasses opts
- , readerApplyMacros = not laTeXOutput
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
+ , readerExtensions = readerExts
}
highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
@@ -339,6 +343,7 @@ convertWithOpts opts = do
writerNumberSections = optNumberSections opts,
writerNumberOffset = optNumberOffset opts,
writerSectionDivs = optSectionDivs opts,
+ writerExtensions = writerExts,
writerReferenceLinks = optReferenceLinks opts,
writerReferenceLocation = optReferenceLocation opts,
writerDpi = optDpi opts,
@@ -354,6 +359,7 @@ convertWithOpts opts = do
writerSlideLevel = optSlideLevel opts,
writerHighlightStyle = highlightStyle,
writerSetextHeaders = optSetextHeaders opts,
+ writerEpubSubdirectory = optEpubSubdirectory opts,
writerEpubMetadata = epubMetadata,
writerEpubFonts = optEpubFonts opts,
writerEpubChapterLevel = optEpubChapterLevel opts,
@@ -375,13 +381,21 @@ convertWithOpts opts = do
"Specify an output file using the -o option."
- let transforms = case optBaseHeaderLevel opts of
- x | x > 1 -> [headerShift (x - 1)]
- | otherwise -> []
+ let transforms = (case optBaseHeaderLevel opts of
+ x | x > 1 -> (headerShift (x - 1) :)
+ | otherwise -> id) $
+ (if extensionEnabled Ext_east_asian_line_breaks
+ readerExts &&
+ not (extensionEnabled Ext_east_asian_line_breaks
+ writerExts &&
+ writerWrapText writerOptions == WrapPreserve)
+ then (eastAsianLineBreakFilter :)
+ else id)
+ []
let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
- then 0
- else optTabStop opts)
+ then 0
+ else optTabStop opts)
readSources :: [FilePath] -> PandocIO Text
readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
@@ -390,6 +404,7 @@ convertWithOpts opts = do
let runIO' :: PandocIO a -> IO a
runIO' f = do
(res, reports) <- runIOorExplode $ do
+ setTrace (optTrace opts)
setVerbosity verbosity
x <- f
rs <- getLog
@@ -518,7 +533,8 @@ externalFilter f args' d = liftIO $ do
(exitcode, outbs) <- E.handle filterException $
pipeProcess env' f' args'' $ encode d
case exitcode of
- ExitSuccess -> return $ either error id $ eitherDecode' outbs
+ ExitSuccess -> either (E.throwIO . PandocFilterError f)
+ return $ eitherDecode' outbs
ExitFailure ec -> E.throwIO $ PandocFilterError f
("Filter returned error status " ++ show ec)
where filterException :: E.SomeException -> IO a
@@ -550,6 +566,7 @@ data Opt = Opt
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optAbbreviations :: Maybe FilePath -- ^ Path to abbrevs file
, optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc
+ , optEpubSubdirectory :: String -- ^ EPUB subdir in OCF container
, optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata
, optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed
, optEpubChapterLevel :: Int -- ^ Header level at which to split chapters
@@ -558,6 +575,7 @@ data Opt = Opt
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output
+ , optTrace :: Bool -- ^ Enable tracing
, optLogFile :: Maybe FilePath -- ^ File to write JSON log output
, optFailIfWarnings :: Bool -- ^ Fail on warnings
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
@@ -624,6 +642,7 @@ defaultOpts = Opt
, optHTMLMathMethod = PlainMath
, optAbbreviations = Nothing
, optReferenceDoc = Nothing
+ , optEpubSubdirectory = "EPUB"
, optEpubMetadata = Nothing
, optEpubFonts = []
, optEpubChapterLevel = 1
@@ -632,6 +651,7 @@ defaultOpts = Opt
, optDumpArgs = False
, optIgnoreArgs = False
, optVerbosity = WARNING
+ , optTrace = False
, optLogFile = Nothing
, optFailIfWarnings = False
, optReferenceLinks = False
@@ -778,10 +798,16 @@ expandFilterPath mbDatadir fp = liftIO $ do
_ -> return fp
applyLuaFilters :: MonadIO m
- => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
+ => Maybe FilePath -> [FilePath] -> [String] -> Pandoc
+ -> m Pandoc
applyLuaFilters mbDatadir filters args d = do
expandedFilters <- mapM (expandFilterPath mbDatadir) filters
- foldrM ($) d $ map (flip runLuaFilter args) expandedFilters
+ let go f d' = liftIO $ do
+ res <- E.try (runLuaFilter mbDatadir f args d')
+ case res of
+ Right x -> return x
+ Left (LuaException s) -> E.throw (PandocFilterError f s)
+ foldrM ($) d $ map go expandedFilters
applyFilters :: MonadIO m
=> Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc
@@ -968,7 +994,7 @@ options =
templ <- getDefaultTemplate Nothing arg
case templ of
Right t -> UTF8.hPutStr stdout t
- Left e -> error $ show e
+ Left e -> E.throwIO $ PandocAppError (show e)
exitSuccess)
"FORMAT")
"" -- "Print default template for FORMAT"
@@ -1232,6 +1258,13 @@ options =
"FILE")
"" -- "Path of custom reference doc"
+ , Option "" ["epub-subdirectory"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optEpubSubdirectory = arg })
+ "DIRNAME")
+ "" -- "Name of subdirectory for epub content in OCF container"
+
, Option "" ["epub-cover-image"]
(ReqArg
(\arg opt ->
@@ -1355,7 +1388,8 @@ options =
, Option "" ["mathjax"]
(OptArg
(\arg opt -> do
- let url' = fromMaybe "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_CHTML-full" arg
+ let url' = fromMaybe (defaultMathJaxURL ++
+ "MathJax.js?config=TeX-AMS_CHTML-full") arg
return opt { optHTMLMathMethod = MathJax url'})
"URL")
"" -- "Use MathJax for HTML math"
@@ -1364,7 +1398,7 @@ options =
(\arg opt ->
return opt
{ optKaTeXJS =
- arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.js"})
+ arg <|> Just (defaultKaTeXURL ++ "katex.min.js")})
"URL")
"" -- Use KaTeX for HTML Math
@@ -1388,7 +1422,7 @@ options =
, Option "" ["trace"]
(NoArg
- (\opt -> return opt { optVerbosity = DEBUG }))
+ (\opt -> return opt { optTrace = True }))
"" -- "Turn on diagnostic tracing in readers."
, Option "" ["dump-args"]
@@ -1530,6 +1564,8 @@ handleUnrecognizedOption :: String -> [String] -> [String]
handleUnrecognizedOption "--smart" =
(("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++
"For example: pandoc -f markdown+smart -t markdown-smart.") :)
+handleUnrecognizedOption "--normalize" =
+ ("--normalize has been removed. Normalization is now automatic." :)
handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
handleUnrecognizedOption "--old-dashes" =
("--old-dashes has been removed. Use +old_dashes extension instead." :)
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
new file mode 100644
index 000000000..b4b55c5d4
--- /dev/null
+++ b/src/Text/Pandoc/BCP47.hs
@@ -0,0 +1,139 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.BCP47
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing and rendering BCP47 language identifiers.
+-}
+module Text.Pandoc.BCP47 (
+ getLang
+ , toLang
+ , parseBCP47
+ , Lang(..)
+ , renderLang
+ )
+where
+import Control.Monad (guard)
+import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower,
+ isAlphaNum)
+import Data.List (intercalate)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import qualified Text.Parsec as P
+
+-- | Represents BCP 47 language/country code.
+data Lang = Lang{ langLanguage :: String
+ , langScript :: String
+ , langRegion :: String
+ , langVariants :: [String] }
+ deriving (Eq, Ord, Show)
+
+-- | Render a Lang as BCP 47.
+renderLang :: Lang -> String
+renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
+ ([langScript lang, langRegion lang] ++ langVariants lang))
+
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: WriterOptions -> Meta -> Maybe String
+getLang opts meta =
+ case lookup "lang" (writerVariables opts) of
+ Just s -> Just s
+ _ ->
+ case lookupMeta "lang" meta of
+ Just (MetaInlines [Str s]) -> Just s
+ Just (MetaString s) -> Just s
+ _ -> Nothing
+
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+ case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
+
+-- | Parse a BCP 47 string as a Lang. Currently we parse
+-- extensions and private-use fields as "variants," even
+-- though officially they aren't.
+parseBCP47 :: String -> Either String Lang
+parseBCP47 lang =
+ case P.parse bcp47 "lang" lang of
+ Right r -> Right r
+ Left e -> Left $ show e
+ where bcp47 = do
+ language <- pLanguage
+ script <- P.option "" pScript
+ region <- P.option "" pRegion
+ variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
+ P.eof
+ return $ Lang{ langLanguage = language
+ , langScript = script
+ , langRegion = region
+ , langVariants = variants }
+ asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
+ pLanguage = do
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return $ map toLower cs
+ pScript = P.try $ do
+ P.char '-'
+ x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
+ xs <- P.count 3
+ (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
+ return $ map toLower (x:xs)
+ pRegion = P.try $ do
+ P.char '-'
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return $ map toUpper cs
+ pVariant = P.try $ do
+ P.char '-'
+ ds <- P.option "" (P.count 1 P.digit)
+ cs <- P.many1 asciiLetter
+ let var = ds ++ cs
+ guard $ if null ds
+ then length var >= 5 && length var <= 8
+ else length var == 4
+ return $ map toLower var
+ pExtension = P.try $ do
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ length cs >= 2 && length cs <= 8
+ return $ map toLower cs
+ pPrivateUse = P.try $ do
+ P.char '-'
+ P.char 'x'
+ P.char '-'
+ cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
+ guard $ length cs >= 1 && length cs <= 8
+ let var = "x-" ++ cs
+ return $ map toLower var
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 14a0b8044..120ba8fee 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Class ( PandocMonad(..)
, getZonedTime
, readFileFromDirs
, report
+ , setTrace
, getLog
, setVerbosity
, getMediaBag
@@ -78,7 +79,7 @@ import qualified Text.Pandoc.Shared as IO ( readDataFile
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Compat.Time (UTCTime)
import Text.Pandoc.Logging
-import Text.Parsec (ParsecT)
+import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
import qualified Text.Pandoc.Compat.Time as IO (getCurrentTime)
import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
import Text.Pandoc.Definition
@@ -107,7 +108,7 @@ import qualified System.FilePath.Glob as IO (glob)
import qualified System.Directory as IO (getModificationTime)
import Control.Monad as M (fail)
import Control.Monad.Reader (ReaderT)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Control.Monad.Except
import Control.Monad.Writer (WriterT)
import Control.Monad.RWS (RWST)
@@ -117,6 +118,7 @@ import System.IO.Error
import System.IO (stderr)
import qualified Data.Map as M
import Text.Pandoc.Error
+import qualified Debug.Trace
class (Functor m, Applicative m, Monad m, MonadError PandocError m)
=> PandocMonad m where
@@ -140,6 +142,11 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m)
modifyCommonState :: (CommonState -> CommonState) -> m ()
modifyCommonState f = getCommonState >>= putCommonState . f
+ trace :: String -> m ()
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ())
+
logOutput :: LogMessage -> m ()
-- Functions defined for all PandocMonad instances
@@ -155,10 +162,11 @@ report :: PandocMonad m => LogMessage -> m ()
report msg = do
verbosity <- getsCommonState stVerbosity
let level = messageVerbosity msg
- when (level <= verbosity) $
- logOutput msg
- unless (level == DEBUG) $
- modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+ when (level <= verbosity) $ logOutput msg
+ modifyCommonState $ \st -> st{ stLog = msg : stLog st }
+
+setTrace :: PandocMonad m => Bool -> m ()
+setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
setMediaBag :: PandocMonad m => MediaBag -> m ()
setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
@@ -208,6 +216,7 @@ data CommonState = CommonState { stLog :: [LogMessage]
, stOutputFile :: Maybe FilePath
, stResourcePath :: [FilePath]
, stVerbosity :: Verbosity
+ , stTrace :: Bool
}
instance Default CommonState where
@@ -217,6 +226,7 @@ instance Default CommonState where
, stOutputFile = Nothing
, stResourcePath = ["."]
, stVerbosity = WARNING
+ , stTrace = False
}
runIO :: PandocIO a -> IO (Either PandocError a)
@@ -561,8 +571,20 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
+ trace msg = do
+ tracing <- getsCommonState stTrace
+ when tracing $ do
+ pos <- getPosition
+ Debug.Trace.trace
+ ("[trace] Parsed " ++ msg ++ " at line " ++
+ show (sourceLine pos) ++
+ if sourceName pos == "chunk"
+ then " of chunk"
+ else "")
+ (return ())
logOutput = lift . logOutput
+
instance PandocMonad m => PandocMonad (ReaderT r m) where
lookupEnv = lift . lookupEnv
getCurrentTime = lift getCurrentTime
diff --git a/src/Text/Pandoc/Compat/Time.hs b/src/Text/Pandoc/Compat/Time.hs
index 1de197801..b1cde82a4 100644
--- a/src/Text/Pandoc/Compat/Time.hs
+++ b/src/Text/Pandoc/Compat/Time.hs
@@ -27,4 +27,4 @@ where
import Data.Time
import System.Locale ( defaultTimeLocale )
-#endif \ No newline at end of file
+#endif
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 3cf381168..24186720c 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -61,7 +61,10 @@ data PandocError = PandocIOError String IOError
| PandocFilterError String String
| PandocCouldNotFindDataFileError String
| PandocResourceNotFound String
+ | PandocTemplateError String
| PandocAppError String
+ | PandocEpubSubdirectoryError String
+ | PandocMacroLoop String
deriving (Show, Typeable, Generic)
instance Exception PandocError
@@ -101,7 +104,12 @@ handleError (Left e) =
"Could not find data file " ++ fn
PandocResourceNotFound fn -> err 99 $
"File " ++ fn ++ " not found in resource path"
+ PandocTemplateError s -> err 5 s
PandocAppError s -> err 1 s
+ PandocEpubSubdirectoryError s -> err 31 $
+ "EPUB subdirectory name '" ++ s ++ "' contains illegal characters"
+ PandocMacroLoop s -> err 91 $
+ "Loop encountered in expanding macro " ++ s
err :: Int -> String -> IO a
err exitCode msg = do
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 58e8c414d..28459d4e6 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -45,7 +45,7 @@ module Text.Pandoc.Extensions ( Extension(..)
, githubMarkdownExtensions
, multimarkdownExtensions )
where
-import Data.Bits (clearBit, setBit, testBit)
+import Data.Bits (clearBit, setBit, testBit, (.|.))
import Data.Data (Data)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
@@ -55,6 +55,10 @@ import Text.Parsec
newtype Extensions = Extensions Integer
deriving (Show, Read, Eq, Ord, Data, Typeable, Generic)
+instance Monoid Extensions where
+ mempty = Extensions 0
+ mappend (Extensions a) (Extensions b) = Extensions (a .|. b)
+
extensionsFromList :: [Extension] -> Extensions
extensionsFromList = foldr enableExtension emptyExtensions
@@ -94,6 +98,7 @@ data Extension =
| Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
| Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
| Ext_inline_code_attributes -- ^ Allow attributes on inline code
+ | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines
| Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
| Ext_native_spans -- ^ Use Span inlines for contents of <span>
@@ -162,6 +167,7 @@ pandocExtensions = extensionsFromList
, Ext_fenced_code_attributes
, Ext_backtick_code_blocks
, Ext_inline_code_attributes
+ , Ext_raw_attribute
, Ext_markdown_in_html_blocks
, Ext_native_divs
, Ext_native_spans
@@ -238,7 +244,6 @@ githubMarkdownExtensions = extensionsFromList
, Ext_space_in_atx_header
, Ext_intraword_underscores
, Ext_strikeout
- , Ext_hard_line_breaks
, Ext_emoji
, Ext_lists_without_preceding_blankline
, Ext_shortcut_reference_links
@@ -275,6 +280,8 @@ multimarkdownExtensions = extensionsFromList
, Ext_subscript
, Ext_backtick_code_blocks
, Ext_spaced_reference_links
+ -- So far only in dev version of mmd:
+ , Ext_raw_attribute
]
-- | Language extensions to be used with strict markdown.
@@ -311,6 +318,7 @@ getDefaultExtensions "epub2" = getDefaultExtensions "epub"
getDefaultExtensions "epub3" = getDefaultExtensions "epub"
getDefaultExtensions "latex" = extensionsFromList
[Ext_smart,
+ Ext_latex_macros,
Ext_auto_identifiers]
getDefaultExtensions "context" = extensionsFromList
[Ext_smart,
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index da8c775f6..1dcff7470 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -52,7 +52,7 @@ import Text.Pandoc.Definition
import Text.Parsec.Pos
-- | Verbosity level.
-data Verbosity = ERROR | WARNING | INFO | DEBUG
+data Verbosity = ERROR | WARNING | INFO
deriving (Show, Read, Eq, Data, Enum, Ord, Bounded, Typeable, Generic)
instance ToJSON Verbosity where
@@ -63,7 +63,6 @@ instance FromJSON Verbosity where
"ERROR" -> return ERROR
"WARNING" -> return WARNING
"INFO" -> return INFO
- "DEBUG" -> return DEBUG
_ -> mzero
parseJSON _ = mzero
@@ -78,7 +77,7 @@ data LogMessage =
| CircularReference String SourcePos
| ParsingUnescaped String SourcePos
| CouldNotLoadIncludeFile String SourcePos
- | ParsingTrace String SourcePos
+ | MacroAlreadyDefined String SourcePos
| InlineNotRendered Inline
| BlockNotRendered Block
| DocxParserWarning String
@@ -92,7 +91,9 @@ data LogMessage =
| Extracting String
| NoTitleElement String
| NoLangSpecified
+ | InvalidLang String
| CouldNotHighlight String
+ | MissingCharacter String
deriving (Show, Eq, Data, Ord, Typeable, Generic)
instance ToJSON LogMessage where
@@ -150,11 +151,11 @@ instance ToJSON LogMessage where
"source" .= Text.pack (sourceName pos),
"line" .= toJSON (sourceLine pos),
"column" .= toJSON (sourceColumn pos)]
- ParsingTrace s pos ->
- ["contents" .= Text.pack s,
+ MacroAlreadyDefined name pos ->
+ ["name" .= Text.pack name,
"source" .= Text.pack (sourceName pos),
- "line" .= sourceLine pos,
- "column" .= sourceColumn pos]
+ "line" .= toJSON (sourceLine pos),
+ "column" .= toJSON (sourceColumn pos)]
InlineNotRendered il ->
["contents" .= toJSON il]
BlockNotRendered bl ->
@@ -184,8 +185,12 @@ instance ToJSON LogMessage where
NoTitleElement fallback ->
["fallback" .= Text.pack fallback]
NoLangSpecified -> []
+ InvalidLang s ->
+ ["lang" .= Text.pack s]
CouldNotHighlight msg ->
["message" .= Text.pack msg]
+ MissingCharacter msg ->
+ ["message" .= Text.pack msg]
showPos :: SourcePos -> String
showPos pos = sn ++ "line " ++
@@ -225,8 +230,8 @@ showLogMessage msg =
"Parsing unescaped '" ++ s ++ "' at " ++ showPos pos
CouldNotLoadIncludeFile fp pos ->
"Could not load include file '" ++ fp ++ "' at " ++ showPos pos
- ParsingTrace s pos ->
- "Parsing trace at " ++ showPos pos ++ ": " ++ s
+ MacroAlreadyDefined name pos ->
+ "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos
InlineNotRendered il ->
"Not rendering " ++ show il
BlockNotRendered bl ->
@@ -260,8 +265,13 @@ showLogMessage msg =
NoLangSpecified ->
"No value for 'lang' was specified in the metadata.\n" ++
"It is recommended that lang be specified for this format."
+ InvalidLang s ->
+ "Invalid 'lang' value '" ++ s ++ "'.\n" ++
+ "Use an IETF language tag like 'en-US'."
CouldNotHighlight m ->
"Could not highlight code block:\n" ++ m
+ MissingCharacter m ->
+ "Missing character: " ++ m
messageVerbosity:: LogMessage -> Verbosity
messageVerbosity msg =
@@ -275,8 +285,8 @@ messageVerbosity msg =
ReferenceNotFound{} -> WARNING
CircularReference{} -> WARNING
CouldNotLoadIncludeFile{} -> WARNING
+ MacroAlreadyDefined{} -> WARNING
ParsingUnescaped{} -> INFO
- ParsingTrace{} -> DEBUG
InlineNotRendered{} -> INFO
BlockNotRendered{} -> INFO
DocxParserWarning{} -> WARNING
@@ -290,4 +300,6 @@ messageVerbosity msg =
Extracting{} -> INFO
NoTitleElement{} -> WARNING
NoLangSpecified -> INFO
+ InvalidLang{} -> WARNING
CouldNotHighlight{} -> WARNING
+ MissingCharacter{} -> WARNING
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index f74c0e425..22b68d5e0 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,3 +1,8 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright © 2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -15,9 +20,6 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
{- |
Module : Text.Pandoc.Lua
Copyright : Copyright © 2017 Albert Krewinkel
@@ -28,11 +30,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Pandoc lua utils.
-}
-module Text.Pandoc.Lua ( runLuaFilter, pushPandocModule ) where
+module Text.Pandoc.Lua ( LuaException(..),
+ runLuaFilter,
+ pushPandocModule ) where
-import Control.Monad (unless, when, (>=>))
+import Control.Exception
+import Control.Monad (unless, when, (>=>), mplus)
import Control.Monad.Trans (MonadIO (..))
+import Data.Data (toConstr, showConstr, dataTypeOf, dataTypeConstrs, Data)
import Data.Map (Map)
+import Data.Maybe (isJust)
+import Data.Typeable (Typeable)
import Scripting.Lua (LuaState, StackValue (..))
import Text.Pandoc.Definition
import Text.Pandoc.Lua.PandocModule (pushPandocModule)
@@ -42,24 +50,25 @@ import Text.Pandoc.Walk
import qualified Data.Map as Map
import qualified Scripting.Lua as Lua
+newtype LuaException = LuaException String
+ deriving (Show, Typeable)
+
+instance Exception LuaException
+
runLuaFilter :: (MonadIO m)
- => FilePath -> [String] -> Pandoc -> m Pandoc
-runLuaFilter filterPath args pd = liftIO $ do
+ => Maybe FilePath -> FilePath -> [String] -> Pandoc -> m Pandoc
+runLuaFilter datadir filterPath args pd = liftIO $ do
lua <- Lua.newstate
Lua.openlibs lua
- -- create table in registry to store filter functions
- Lua.push lua "PANDOC_FILTER_FUNCTIONS"
- Lua.newtable lua
- Lua.rawset lua Lua.registryindex
-- store module in global "pandoc"
- pushPandocModule lua
+ pushPandocModule datadir lua
Lua.setglobal lua "pandoc"
top <- Lua.gettop lua
status <- Lua.loadfile lua filterPath
- if (status /= 0)
+ if status /= 0
then do
Just luaErrMsg <- Lua.peek lua 1
- error luaErrMsg
+ throwIO (LuaException luaErrMsg)
else do
Lua.call lua 0 Lua.multret
newtop <- Lua.gettop lua
@@ -80,157 +89,91 @@ pushGlobalFilter lua =
*> Lua.rawseti lua (-2) 1
runAll :: [LuaFilter] -> Pandoc -> IO Pandoc
-runAll [] = return
-runAll (x:xs) = walkMWithLuaFilter x >=> runAll xs
+runAll = foldr ((>=>) . walkMWithLuaFilter) return
walkMWithLuaFilter :: LuaFilter -> Pandoc -> IO Pandoc
walkMWithLuaFilter (LuaFilter lua fnMap) =
- walkM (execInlineLuaFilter lua fnMap) >=>
- walkM (execBlockLuaFilter lua fnMap) >=>
- walkM (execMetaLuaFilter lua fnMap) >=>
- walkM (execDocLuaFilter lua fnMap)
+ (if hasOneOf (constructorsFor (dataTypeOf (Str [])))
+ then walkM (tryFilter lua fnMap :: Inline -> IO Inline)
+ else return)
+ >=>
+ (if hasOneOf (constructorsFor (dataTypeOf (Para [])))
+ then walkM (tryFilter lua fnMap :: Block -> IO Block)
+ else return)
+ >=>
+ (case Map.lookup "Meta" fnMap of
+ Just fn -> walkM (\(Pandoc meta blocks) -> do
+ meta' <- runFilterFunction lua fn meta
+ return $ Pandoc meta' blocks)
+ Nothing -> return)
+ >=>
+ (case Map.lookup "Pandoc" fnMap `mplus` Map.lookup "Doc" fnMap of
+ Just fn -> (runFilterFunction lua fn) :: Pandoc -> IO Pandoc
+ Nothing -> return)
+ where hasOneOf = any (\k -> isJust (Map.lookup k fnMap))
+ constructorsFor x = map show (dataTypeConstrs x)
type FunctionMap = Map String LuaFilterFunction
data LuaFilter = LuaFilter LuaState FunctionMap
newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int }
-execDocLuaFilter :: LuaState
- -> FunctionMap
- -> Pandoc -> IO Pandoc
-execDocLuaFilter lua fnMap x = do
- let docFnName = "Doc"
- case Map.lookup docFnName fnMap of
+tryFilter :: (Data a, StackValue a) => LuaState -> FunctionMap -> a -> IO a
+tryFilter lua fnMap x =
+ let filterFnName = showConstr (toConstr x) in
+ case Map.lookup filterFnName fnMap of
Nothing -> return x
- Just fn -> runLuaFilterFunction lua fn x
-
-execMetaLuaFilter :: LuaState
- -> FunctionMap
- -> Pandoc -> IO Pandoc
-execMetaLuaFilter lua fnMap pd@(Pandoc meta blks) = do
- let metaFnName = "Meta"
- case Map.lookup metaFnName fnMap of
- Nothing -> return pd
- Just fn -> do
- meta' <- runLuaFilterFunction lua fn meta
- return $ Pandoc meta' blks
-
-execBlockLuaFilter :: LuaState
- -> FunctionMap
- -> Block -> IO Block
-execBlockLuaFilter lua fnMap x = do
- let tryFilter :: String -> IO Block
- tryFilter filterFnName =
- case Map.lookup filterFnName fnMap of
- Nothing -> return x
- Just fn -> runLuaFilterFunction lua fn x
- case x of
- BlockQuote _ -> tryFilter "BlockQuote"
- BulletList _ -> tryFilter "BulletList"
- CodeBlock _ _ -> tryFilter "CodeBlock"
- DefinitionList _ -> tryFilter "DefinitionList"
- Div _ _ -> tryFilter "Div"
- Header _ _ _ -> tryFilter "Header"
- HorizontalRule -> tryFilter "HorizontalRule"
- LineBlock _ -> tryFilter "LineBlock"
- Null -> tryFilter "Null"
- Para _ -> tryFilter "Para"
- Plain _ -> tryFilter "Plain"
- RawBlock _ _ -> tryFilter "RawBlock"
- OrderedList _ _ -> tryFilter "OrderedList"
- Table _ _ _ _ _ -> tryFilter "Table"
-
-execInlineLuaFilter :: LuaState
- -> FunctionMap
- -> Inline -> IO Inline
-execInlineLuaFilter lua fnMap x = do
- let tryFilter :: String -> IO Inline
- tryFilter filterFnName =
- case Map.lookup filterFnName fnMap of
- Nothing -> return x
- Just fn -> runLuaFilterFunction lua fn x
- let tryFilterAlternatives :: [String] -> IO Inline
- tryFilterAlternatives [] = return x
- tryFilterAlternatives (fnName : alternatives) =
- case Map.lookup fnName fnMap of
- Nothing -> tryFilterAlternatives alternatives
- Just fn -> runLuaFilterFunction lua fn x
- case x of
- Cite _ _ -> tryFilter "Cite"
- Code _ _ -> tryFilter "Code"
- Emph _ -> tryFilter "Emph"
- Image _ _ _ -> tryFilter "Image"
- LineBreak -> tryFilter "LineBreak"
- Link _ _ _ -> tryFilter "Link"
- Math DisplayMath _ -> tryFilterAlternatives ["DisplayMath", "Math"]
- Math InlineMath _ -> tryFilterAlternatives ["InlineMath", "Math"]
- Note _ -> tryFilter "Note"
- Quoted DoubleQuote _ -> tryFilterAlternatives ["DoubleQuoted", "Quoted"]
- Quoted SingleQuote _ -> tryFilterAlternatives ["SingleQuoted", "Quoted"]
- RawInline _ _ -> tryFilter "RawInline"
- SmallCaps _ -> tryFilter "SmallCaps"
- SoftBreak -> tryFilter "SoftBreak"
- Space -> tryFilter "Space"
- Span _ _ -> tryFilter "Span"
- Str _ -> tryFilter "Str"
- Strikeout _ -> tryFilter "Strikeout"
- Strong _ -> tryFilter "Strong"
- Subscript _ -> tryFilter "Subscript"
- Superscript _ -> tryFilter "Superscript"
+ Just fn -> runFilterFunction lua fn x
instance StackValue LuaFilter where
valuetype _ = Lua.TTABLE
push = undefined
peek lua idx = fmap (LuaFilter lua) <$> Lua.peek lua idx
--- | Helper class for pushing a single value to the stack via a lua function.
--- See @pushViaCall@.
-class PushViaFilterFunction a where
- pushViaFilterFunction' :: LuaState -> LuaFilterFunction -> IO () -> Int -> a
-
-instance StackValue a => PushViaFilterFunction (IO a) where
- pushViaFilterFunction' lua lf pushArgs num = do
- pushFilterFunction lua lf
- pushArgs
- Lua.call lua num 1
- mbres <- Lua.peek lua (-1)
- case mbres of
- Nothing -> error $ "Error while trying to get a filter's return "
- ++ "value from lua stack."
- Just res -> res <$ Lua.pop lua 1
-
-instance (StackValue a, PushViaFilterFunction b) =>
- PushViaFilterFunction (a -> b) where
- pushViaFilterFunction' lua lf pushArgs num x =
- pushViaFilterFunction' lua lf (pushArgs *> push lua x) (num + 1)
-
--- | Push an value to the stack via a lua filter function. The function is
--- called with all arguments that are passed to this function and is expected to
--- return a single value.
-runLuaFilterFunction :: PushViaFilterFunction a
- => LuaState -> LuaFilterFunction -> a
-runLuaFilterFunction lua lf = pushViaFilterFunction' lua lf (return ()) 0
+-- | Push a value to the stack via a lua filter function. The filter function is
+-- called with given element as argument and is expected to return an element.
+-- Alternatively, the function can return nothing or nil, in which case the
+-- element is left unchanged.
+runFilterFunction :: StackValue a => LuaState -> LuaFilterFunction -> a -> IO a
+runFilterFunction lua lf x = do
+ pushFilterFunction lua lf
+ Lua.push lua x
+ z <- Lua.pcall lua 1 1 0
+ if (z /= 0)
+ then do
+ msg <- Lua.peek lua (-1)
+ let prefix = "Error while running filter function: "
+ throwIO . LuaException $
+ case msg of
+ Nothing -> prefix ++ "could not read error message"
+ Just msg' -> prefix ++ msg'
+ else do
+ resType <- Lua.ltype lua (-1)
+ case resType of
+ Lua.TNIL -> Lua.pop lua 1 *> return x
+ _ -> do
+ mbres <- Lua.peek lua (-1)
+ case mbres of
+ Nothing -> throwIO $ LuaException
+ ("Error while trying to get a filter's return "
+ ++ "value from lua stack.")
+ Just res -> res <$ Lua.pop lua 1
-- | Push the filter function to the top of the stack.
pushFilterFunction :: Lua.LuaState -> LuaFilterFunction -> IO ()
-pushFilterFunction lua lf = do
+pushFilterFunction lua lf =
-- The function is stored in a lua registry table, retrieve it from there.
- push lua ("PANDOC_FILTER_FUNCTIONS"::String)
- Lua.rawget lua Lua.registryindex
- Lua.rawgeti lua (-1) (functionIndex lf)
- Lua.remove lua (-2) -- remove registry table from stack
+ Lua.rawgeti lua Lua.registryindex (functionIndex lf)
+
+registerFilterFunction :: LuaState -> Int -> IO LuaFilterFunction
+registerFilterFunction lua idx = do
+ isFn <- Lua.isfunction lua idx
+ unless isFn . throwIO . LuaException $ "Not a function at index " ++ show idx
+ Lua.pushvalue lua idx
+ refIdx <- Lua.ref lua Lua.registryindex
+ return $ LuaFilterFunction refIdx
instance StackValue LuaFilterFunction where
valuetype _ = Lua.TFUNCTION
- push lua v = pushFilterFunction lua v
- peek lua i = do
- isFn <- Lua.isfunction lua i
- unless isFn (error $ "Not a function at index " ++ (show i))
- Lua.pushvalue lua i
- push lua ("PANDOC_FILTER_FUNCTIONS"::String)
- Lua.rawget lua Lua.registryindex
- len <- Lua.objlen lua (-1)
- Lua.insert lua (-2)
- Lua.rawseti lua (-2) (len + 1)
- Lua.pop lua 1
- return . Just $ LuaFilterFunction (len + 1)
+ push = pushFilterFunction
+ peek = fmap (fmap Just) . registerFilterFunction
diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs
index 27c19d4f0..2d0baf4f8 100644
--- a/src/Text/Pandoc/Lua/PandocModule.hs
+++ b/src/Text/Pandoc/Lua/PandocModule.hs
@@ -34,15 +34,16 @@ import Data.Text (pack)
import Scripting.Lua (LuaState, call, push, pushhsfunction, rawset)
import Text.Pandoc.Class hiding (readDataFile)
import Text.Pandoc.Definition (Pandoc)
+import Text.Pandoc.Options (ReaderOptions(readerExtensions))
import Text.Pandoc.Lua.Compat (loadstring)
import Text.Pandoc.Lua.StackInstances ()
import Text.Pandoc.Readers (Reader (..), getReader)
import Text.Pandoc.Shared (readDataFile)
-- | Push the "pandoc" on the lua stack.
-pushPandocModule :: LuaState -> IO ()
-pushPandocModule lua = do
- script <- pandocModuleScript
+pushPandocModule :: Maybe FilePath -> LuaState -> IO ()
+pushPandocModule datadir lua = do
+ script <- pandocModuleScript datadir
status <- loadstring lua script "pandoc.lua"
unless (status /= 0) $ call lua 0 1
push lua "__read"
@@ -50,17 +51,17 @@ pushPandocModule lua = do
rawset lua (-3)
-- | Get the string representation of the pandoc module
-pandocModuleScript :: IO String
-pandocModuleScript = unpack <$> readDataFile Nothing "pandoc.lua"
+pandocModuleScript :: Maybe FilePath -> IO String
+pandocModuleScript datadir = unpack <$> readDataFile datadir "pandoc.lua"
read_doc :: String -> String -> IO (Either String Pandoc)
read_doc formatSpec content = do
case getReader formatSpec of
Left s -> return $ Left s
- Right reader ->
+ Right (reader, es) ->
case reader of
TextReader r -> do
- res <- runIO $ r def (pack content)
+ res <- runIO $ r def{ readerExtensions = es } (pack content)
case res of
Left s -> return . Left $ show s
Right pd -> return $ Right pd
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c7211c86e..d7e77010e 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -61,7 +61,6 @@ data ReaderOptions = ReaderOptions{
, readerStandalone :: Bool -- ^ Standalone document with header
, readerColumns :: Int -- ^ Number of columns in terminal
, readerTabStop :: Int -- ^ Tab stop
- , readerApplyMacros :: Bool -- ^ Apply macros to TeX math
, readerIndentedCodeClasses :: [String] -- ^ Default classes for
-- indented code blocks
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
@@ -75,7 +74,6 @@ instance Default ReaderOptions
, readerStandalone = False
, readerColumns = 80
, readerTabStop = 4
- , readerApplyMacros = True
, readerIndentedCodeClasses = []
, readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
@@ -213,6 +211,7 @@ data WriterOptions = WriterOptions
, writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting
-- (Nothing = no highlighting)
, writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
+ , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF
, writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB
, writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed
, writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
@@ -249,6 +248,7 @@ instance Default WriterOptions where
, writerListings = False
, writerHighlightStyle = Just pygments
, writerSetextHeaders = True
+ , writerEpubSubdirectory = "EPUB"
, writerEpubMetadata = Nothing
, writerEpubFonts = []
, writerEpubChapterLevel = 1
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index cd75d869d..25a94972a 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -40,7 +40,6 @@ import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.ByteString as BS
import Data.ByteString.Lazy (ByteString)
-import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BC
import Data.Maybe (fromMaybe)
@@ -197,7 +196,22 @@ tex2pdf' verbosity args tmpDir program source = do
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
- (ExitSuccess, Just pdf) -> return $ Right pdf
+ (ExitSuccess, Just pdf) -> do
+ missingCharacterWarnings verbosity log'
+ return $ Right pdf
+
+missingCharacterWarnings :: Verbosity -> ByteString -> IO ()
+missingCharacterWarnings verbosity log' = do
+ let ls = BC.lines log'
+ let isMissingCharacterWarning = BC.isPrefixOf "Missing character: "
+ let warnings = [ UTF8.toStringLazy (BC.drop 19 l)
+ | l <- ls
+ , isMissingCharacterWarning l
+ ]
+ runIO $ do
+ setVerbosity verbosity
+ mapM_ (report . MissingCharacter) warnings
+ return ()
-- parsing output
@@ -255,12 +269,12 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
mapM_ print env''
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
- B.readFile file' >>= B.putStr
+ BL.readFile file' >>= BL.putStr
putStr "\n"
(exit, out) <- pipeProcess (Just env'') program programArgs BL.empty
when (verbosity >= INFO) $ do
putStrLn $ "[makePDF] Run #" ++ show runNumber
- B.hPutStr stdout out
+ BL.hPutStr stdout out
putStr "\n"
if runNumber <= numRuns
then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
@@ -271,9 +285,16 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
-- We read PDF as a strict bytestring to make sure that the
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
else return Nothing
- return (exit, out, pdf)
+ -- Note that some things like Missing character warnings
+ -- appear in the log but not on stderr, so we prefer the log:
+ let logFile = replaceExtension file ".log"
+ logExists <- doesFileExist logFile
+ log' <- if logExists
+ then BL.readFile logFile
+ else return out
+ return (exit, log', pdf)
ms2pdf :: Verbosity
-> [String]
@@ -294,7 +315,7 @@ ms2pdf verbosity args source = do
(exit, out) <- pipeProcess (Just env') "pdfroff" args
(BL.fromStrict $ UTF8.fromText source)
when (verbosity >= INFO) $ do
- B.hPutStr stdout out
+ BL.hPutStr stdout out
putStr "\n"
return $ case exit of
ExitFailure _ -> Left out
@@ -318,12 +339,12 @@ html2pdf verbosity args source = do
mapM_ print env'
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
+ BL.readFile file >>= BL.putStr
putStr "\n"
(exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty
removeFile file
when (verbosity >= INFO) $ do
- B.hPutStr stdout out
+ BL.hPutStr stdout out
putStr "\n"
pdfExists <- doesFileExist pdfFile
mbPdf <- if pdfExists
@@ -331,7 +352,7 @@ html2pdf verbosity args source = do
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
then do
- res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ res <- (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
removeFile pdfFile
return res
else return Nothing
@@ -365,11 +386,11 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
mapM_ print env'
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- B.readFile file >>= B.putStr
+ BL.readFile file >>= BL.putStr
putStr "\n"
(exit, out) <- pipeProcess (Just env') "context" programArgs BL.empty
when (verbosity >= INFO) $ do
- B.hPutStr stdout out
+ BL.hPutStr stdout out
putStr "\n"
let pdfFile = replaceExtension file ".pdf"
pdfExists <- doesFileExist pdfFile
@@ -377,7 +398,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
-- We read PDF as a strict bytestring to make sure that the
-- temp directory is removed on Windows.
-- See https://github.com/jgm/pandoc/issues/1192.
- then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ then (Just . BL.fromChunks . (:[])) `fmap` BS.readFile pdfFile
else return Nothing
case (exit, mbPdf) of
(ExitFailure _, _) -> do
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index cd51bff69..549042d14 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -35,7 +35,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
A utility library with parsers used in pandoc readers.
-}
-module Text.Pandoc.Parsing ( anyLine,
+module Text.Pandoc.Parsing ( takeWhileP,
+ takeP,
+ anyLine,
anyLineNewline,
indentWith,
many1Till,
@@ -109,8 +111,6 @@ module Text.Pandoc.Parsing ( anyLine,
dash,
nested,
citeKey,
- macro,
- applyMacros',
Parser,
ParserT,
F,
@@ -130,6 +130,7 @@ module Text.Pandoc.Parsing ( anyLine,
runParser,
runParserT,
parse,
+ tokenPrim,
anyToken,
getInput,
setInput,
@@ -178,24 +179,27 @@ module Text.Pandoc.Parsing ( anyLine,
sourceLine,
setSourceColumn,
setSourceLine,
- newPos
+ newPos,
+ Line,
+ Column
)
where
+import Data.Text (Text)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..), trimInlines)
+import Text.Pandoc.Builder (Blocks, Inlines, HasMeta(..), trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec hiding (token)
-import Text.Parsec.Pos (newPos)
+import Text.Parsec.Pos (newPos, initialPos, updatePosString)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace, isPunctuation )
import Data.List ( intercalate, transpose, isSuffixOf )
import Text.Pandoc.Shared
import qualified Data.Map as M
-import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro, pMacroDefinition)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.HTML.TagSoup.Entity ( lookupEntity )
import Text.Pandoc.Asciify (toAsciiChar)
import Data.Monoid ((<>))
@@ -242,6 +246,35 @@ instance Monoid a => Monoid (Future s a) where
mappend = liftM2 mappend
mconcat = liftM mconcat . sequence
+-- | Parse characters while a predicate is true.
+takeWhileP :: Stream [Char] m Char
+ => (Char -> Bool) -> ParserT [Char] st m [Char]
+takeWhileP f = do
+ -- faster than 'many (satisfy f)'
+ inp <- getInput
+ pos <- getPosition
+ let (xs, rest) = span f inp
+ -- needed to persuade parsec that this won't match an empty string:
+ anyChar
+ setInput rest
+ setPosition $ updatePosString pos xs
+ return xs
+
+-- Parse n characters of input (or the rest of the input if
+-- there aren't n characters).
+takeP :: Stream [Char] m Char => Int -> ParserT [Char] st m [Char]
+takeP n = do
+ guard (n > 0)
+ -- faster than 'count n anyChar'
+ inp <- getInput
+ pos <- getPosition
+ let (xs, rest) = splitAt n inp
+ -- needed to persuade parsec that this won't match an empty string:
+ anyChar
+ setInput rest
+ setPosition $ updatePosString pos xs
+ return xs
+
-- | Parse any line of text
anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
anyLine = do
@@ -366,6 +399,7 @@ parseFromString :: Monad m
-> ParserT String st m a
parseFromString parser str = do
oldPos <- getPosition
+ setPosition $ initialPos "chunk"
oldInput <- getInput
setInput str
result <- parser
@@ -993,7 +1027,7 @@ data ParserState = ParserState
stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
stateNextExample :: Int, -- ^ Number of next example
stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateMacros :: [Macro], -- ^ List of macros defined so far
+ stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
@@ -1056,8 +1090,8 @@ instance HasIdentifierList ParserState where
updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
class HasMacros st where
- extractMacros :: st -> [Macro]
- updateMacros :: ([Macro] -> [Macro]) -> st -> st
+ extractMacros :: st -> M.Map Text Macro
+ updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
instance HasMacros ParserState where
extractMacros = stateMacros
@@ -1111,7 +1145,7 @@ defaultParserState =
stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
- stateMacros = [],
+ stateMacros = M.empty,
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
@@ -1340,33 +1374,6 @@ token :: (Stream s m t)
-> ParsecT s st m a
token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
---
--- Macros
---
-
--- | Parse a \newcommand or \newenviroment macro definition.
-macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
- => ParserT [Char] st m Blocks
-macro = do
- apply <- getOption readerApplyMacros
- (m, def') <- withRaw pMacroDefinition
- if apply
- then do
- updateState $ \st -> updateMacros (m:) st
- return mempty
- else return $ rawBlock "latex" def'
-
--- | Apply current macros to string.
-applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
- => String
- -> ParserT [Char] st m String
-applyMacros' target = do
- apply <- getOption readerApplyMacros
- if apply
- then do macros <- extractMacros <$> getState
- return $ applyMacros macros target
- else return target
-
infixr 5 <+?>
(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
@@ -1384,10 +1391,11 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
Functor mf, Applicative mf, Monad mf)
- => ParserT String st m (mf Blocks)
+ => ParserT [a] st m (mf Blocks)
+ -> (String -> [a])
-> [FilePath] -> FilePath
- -> ParserT String st m (mf Blocks)
-insertIncludedFile' blocks dirs f = do
+ -> ParserT [a] st m (mf Blocks)
+insertIncludedFile' blocks totoks dirs f = do
oldPos <- getPosition
oldInput <- getInput
containers <- getIncludeFiles <$> getState
@@ -1401,7 +1409,7 @@ insertIncludedFile' blocks dirs f = do
report $ CouldNotLoadIncludeFile f oldPos
return ""
setPosition $ newPos f 1 1
- setInput contents
+ setInput $ totoks contents
bs <- blocks
setInput oldInput
setPosition oldPos
@@ -1411,11 +1419,12 @@ insertIncludedFile' blocks dirs f = do
-- | Parse content of include file as blocks. Circular includes result in an
-- @PandocParseError@.
insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
- => ParserT String st m Blocks
+ => ParserT [a] st m Blocks
+ -> (String -> [a])
-> [FilePath] -> FilePath
- -> ParserT String st m Blocks
-insertIncludedFile blocks dirs f =
- runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f
+ -> ParserT [a] st m Blocks
+insertIncludedFile blocks totoks dirs f =
+ runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-- | Parse content of include file as future blocks. Circular includes result in
-- an @PandocParseError@.
@@ -1423,4 +1432,4 @@ insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
=> ParserT String st m (Future st Blocks)
-> [FilePath] -> FilePath
-> ParserT String st m (Future st Blocks)
-insertIncludedFileF = insertIncludedFile'
+insertIncludedFileF p = insertIncludedFile' p id
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index d78a2f1d9..1b3c647a1 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -77,7 +77,7 @@ module Text.Pandoc.Pretty (
)
where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (isSpace)
import Data.Foldable (toList)
import Data.List (intersperse)
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 004fefe25..0374d27d5 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -46,6 +46,7 @@ module Text.Pandoc.Readers
, readMarkdown
, readCommonMark
, readMediaWiki
+ , readVimwiki
, readRST
, readOrg
, readLaTeX
@@ -59,6 +60,7 @@ module Text.Pandoc.Readers
, readTWiki
, readTxt2Tags
, readEPUB
+ , readMuse
-- * Miscellaneous
, getReader
, getDefaultExtensions
@@ -81,6 +83,8 @@ import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
+import Text.Pandoc.Readers.Vimwiki
+import Text.Pandoc.Readers.Muse
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Odt
import Text.Pandoc.Readers.OPML
@@ -113,6 +117,7 @@ readers = [ ("native" , TextReader readNative)
,("commonmark" , TextReader readCommonMark)
,("rst" , TextReader readRST)
,("mediawiki" , TextReader readMediaWiki)
+ ,("vimwiki" , TextReader readVimwiki)
,("docbook" , TextReader readDocBook)
,("opml" , TextReader readOPML)
,("org" , TextReader readOrg)
@@ -125,22 +130,19 @@ readers = [ ("native" , TextReader readNative)
,("odt" , ByteStringReader readOdt)
,("t2t" , TextReader readTxt2Tags)
,("epub" , ByteStringReader readEPUB)
+ ,("muse" , TextReader readMuse)
]
--- | Retrieve reader based on formatSpec (format+extensions).
-getReader :: PandocMonad m => String -> Either String (Reader m)
+-- | Retrieve reader, extensions based on formatSpec (format+extensions).
+getReader :: PandocMonad m => String -> Either String (Reader m, Extensions)
getReader s =
case parseFormatSpec s of
Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
Right (readerName, setExts) ->
case lookup readerName readers of
Nothing -> Left $ "Unknown reader: " ++ readerName
- Just (TextReader r) -> Right $ TextReader $ \o ->
- r o{ readerExtensions = setExts $
- getDefaultExtensions readerName }
- Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
- r o{ readerExtensions = setExts $
- getDefaultExtensions readerName }
+ Just r -> Right (r, setExts $
+ getDefaultExtensions readerName)
-- | Read pandoc document from JSON format.
readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index bd3c7c356..c1e4d742c 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,6 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Data.Char (toUpper)
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, crFilter)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
@@ -9,7 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Data.Either (rights)
import Data.Generics
import Data.Char (isSpace)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
@@ -526,7 +526,8 @@ instance Default DBState where
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp
+ let tree = normalizeTree . parseXML . handleInstructions
+ $ T.unpack $ crFilter inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 2757314ab..21aa358f2 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -76,7 +76,7 @@ module Text.Pandoc.Readers.Docx
import Codec.Archive.Zip
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index e6736100f..24615ba94 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -58,7 +58,7 @@ import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import Data.Char (chr, isDigit, ord, readLitChar)
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
index 38f976fd8..b32a73770 100644
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -7,7 +7,7 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
, hasStyleName
) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (toLower)
import qualified Data.Map as M
import Text.Pandoc.Readers.Docx.Util
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 94f933c4d..734973e33 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, addMetaField
- , escapeURI, safeRead )
+ , escapeURI, safeRead, crFilter )
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
Extension (Ext_epub_html_exts,
Ext_raw_html, Ext_native_divs, Ext_native_spans))
@@ -53,6 +53,7 @@ import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import qualified Data.Map as M
+import Data.Foldable ( for_ )
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isPrefixOf )
import Data.Char ( isDigit, isLetter, isAlphaNum )
@@ -71,7 +72,7 @@ import Data.Monoid ((<>))
import Text.Parsec.Error
import qualified Data.Set as Set
import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Control.Monad.Except (throwError)
-- | Convert HTML-formatted string to 'Pandoc' document.
@@ -82,7 +83,7 @@ readHtml :: PandocMonad m
readHtml opts inp = do
let tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- inp
+ (crFilter inp)
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
type TagParser m = HTMLParser m [Tag Text]
+pHtml :: PandocMonad m => TagParser m Blocks
+pHtml = try $ do
+ (TagOpen "html" attr) <- lookAhead $ pAnyTag
+ for_ (lookup "lang" attr) $
+ updateState . B.setMeta "lang" . B.text . T.unpack
+ pInTags "html" block
+
pBody :: PandocMonad m => TagParser m Blocks
pBody = pInTags "body" block
@@ -162,7 +170,6 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
block :: PandocMonad m => TagParser m Blocks
block = do
- pos <- getPosition
res <- choice
[ eSection
, eSwitch B.para block
@@ -176,13 +183,14 @@ block = do
, pList
, pHrule
, pTable
+ , pHtml
, pHead
, pBody
, pDiv
, pPlain
, pRawHtmlBlock
]
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
@@ -797,6 +805,8 @@ pCloses tagtype = try $ do
(TagClose "dl") | tagtype == "dd" -> return ()
(TagClose "table") | tagtype == "td" -> return ()
(TagClose "table") | tagtype == "tr" -> return ()
+ (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags
+ -> return () -- see #3794
_ -> mzero
pTagText :: PandocMonad m => TagParser m Inlines
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index b22b71b96..a09ed8be9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -27,7 +27,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (splitBy, trim)
+import Text.Pandoc.Shared (splitBy, trim, crFilter)
-- | Parse Haddock markup and return a 'Pandoc' document.
@@ -35,7 +35,7 @@ readHaddock :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack s) of
+readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 17fb48548..a9bafb03b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
@@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
+
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ applyMacros,
rawLaTeXInline,
rawLaTeXBlock,
- inlineCommand,
+ macro,
+ inlineCommand
) where
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (chr, isAlphaNum, isLetter, ord)
-import Data.Text (Text, unpack)
+import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe (fromMaybe, maybeToList)
import Safe (minimumDef)
import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional,
- space, (<|>))
+import Text.Pandoc.Parsing hiding (many, optional, withRaw,
+ mathInline, mathDisplay,
+ space, (<|>), spaces, blankline)
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..),
+ TokType(..))
import Text.Pandoc.Walk
+import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop))
+
+-- for debugging:
+-- import Text.Pandoc.Extensions (getDefaultExtensions)
+-- import Text.Pandoc.Class (runIOorExplode, PandocIO)
+-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
@@ -63,17 +81,18 @@ readLaTeX :: PandocMonad m
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readLaTeX opts ltx = do
- parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx)
+ parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
+ (tokenize (crFilter ltx))
case parsed of
Right result -> return result
- Left e -> throwError e
+ Left e -> throwError $ PandocParsecError (T.unpack ltx) e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
- let meta = stateMeta st
+ let meta = sMeta st
let doc' = doc bs
let headerLevel (Header n _ _) = [n]
headerLevel _ = []
@@ -87,177 +106,476 @@ parseLaTeX = do
else id) doc'
return $ Pandoc meta bs'
-type LP m = ParserT String ParserState m
-
-anyControlSeq :: PandocMonad m => LP m String
-anyControlSeq = do
- char '\\'
- next <- option '\n' anyChar
- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
-
-controlSeq :: PandocMonad m => String -> LP m String
-controlSeq name = try $ do
- char '\\'
- case name of
- "" -> mzero
- [c] | not (isLetter c) -> string [c]
- cs -> string cs <* notFollowedBy letter <* optional sp
- return name
-
-dimenarg :: PandocMonad m => LP m String
-dimenarg = try $ do
- ch <- option "" $ string "="
- num <- many1 digit
- dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- return $ ch ++ num ++ dim
+-- testParser :: LP PandocIO a -> Text -> IO a
+-- testParser p t = do
+-- res <- runIOorExplode (runParserT p defaultLaTeXState{
+-- sOptions = def{ readerExtensions =
+-- enableExtension Ext_raw_tex $
+-- getDefaultExtensions "latex" }} "source" (tokenize t))
+-- case res of
+-- Left e -> error (show e)
+-- Right r -> return r
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: Maybe Inlines
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = Nothing
+ , sInListItem = False
+ , sInTableCell = False
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXBlock = do
+ lookAhead (try (char '\\' >> letter))
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- try $
+ withRaw (environment <|> macroDef <|> blockCommand)
+ return raw
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right raw -> takeP (T.length (untokenize raw))
+
+macro :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m Blocks
+macro = do
+ guardEnabled Ext_latex_macros
+ lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *>
+ oneOfStrings ["command", "environment"])
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- withRaw $ try macroDef
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, st) -> do
+ updateState (updateMacros (const $ sMacros st))
+ mempty <$ takeP (T.length (untokenize raw))
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = do
+ (guardEnabled Ext_latex_macros >>
+ do let retokenize = doMacros 0 *> (toksToString <$> getInput)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s') <|> return s
+
+rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXInline = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ takeP (T.length (untokenize raw))
+
+inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (il, raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (il, raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ takeP (T.length (untokenize raw))
+ return il
+
+tokenize :: Text -> [Tok]
+tokenize = totoks (1, 1)
+
+totoks :: (Line, Column) -> Text -> [Tok]
+totoks (lin,col) t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok (lin, col) Newline "\n"
+ : totoks (lin + 1,1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok (lin, col) Spaces sps
+ : totoks (lin, col + T.length sps) rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok (lin, col) Word ws
+ : totoks (lin, col + T.length ws) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok (lin, col) Comment ("%" <> cs)
+ : totoks (lin, col + 1 + T.length cs) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok (lin, col) Symbol (T.singleton c)]
+ Just (d, rest')
+ | isLetter d ->
+ let (ws, rest'') = T.span isLetter rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (lin,
+ col + 1 + T.length ws + T.length ss) rest'''
+ | d == '\t' || d == '\n' ->
+ Tok (lin, col) Symbol ("\\")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (lin, col + 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok (lin, col) (Arg i) ("#" <> t1)
+ : totoks (lin, col + 1 + T.length t1) t2
+ Nothing ->
+ Tok (lin, col) Symbol ("#")
+ : totoks (lin, col + 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok (lin, col) Esc2 (T.pack ['^','^',d,e])
+ : totoks (lin, col + 4) rest'''
+ _ ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ | d < '\128' ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ _ -> [Tok (lin, col) Symbol ("^"),
+ Tok (lin, col + 1) Symbol ("^")]
+ _ -> Tok (lin, col) Symbol ("^")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest
+
+ where isSpaceOrTab ' ' = True
+ isSpaceOrTab '\t' = True
+ isSpaceOrTab _ = False
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos spos _ (Tok (lin,col) _ _ : _) =
+ setSourceColumn (setSourceLine spos lin) col
+ updatePos spos _ [] = spos
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ when (not verbatimMode) $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro numargs optarg newtoks) -> do
+ setInput ts
+ let getarg = spaces >> braced
+ args <- case optarg of
+ Nothing -> count numargs getarg
+ Just o ->
+ (:) <$> option o bracketedToks
+ <*> count (numargs - 1) getarg
+ let addTok (Tok _ (Arg i) _) acc | i > 0
+ , i <= numargs =
+ map (setpos spos) (args !! (i - 1)) ++ acc
+ addTok t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr addTok ts' newtoks
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+
+setpos :: (Line, Column) -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+ where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+ isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSym
+ where isSym (Tok _ Symbol _) = True
+ isSym _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline
whitespace :: PandocMonad m => LP m ()
-whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
-
-endline :: PandocMonad m => LP m ()
-endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
+whitespace = () <$ satisfyTok isSpaceTok
+ where isSpaceTok (Tok _ Spaces _) = True
+ isSpaceTok _ = False
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
-tildeEscape :: PandocMonad m => LP m Char
-tildeEscape = try $ do
- string "^^"
- c <- satisfy (\x -> x >= '\0' && x <= '\128')
- d <- if isLowerHex c
- then option "" $ count 1 (satisfy isLowerHex)
- else return ""
- if null d
- then case ord c of
- x | x >= 64 && x <= 127 -> return $ chr (x - 64)
- | otherwise -> return $ chr (x + 64)
- else return $ chr $ read ('0':'x':c:d)
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
comment :: PandocMonad m => LP m ()
-comment = do
- char '%'
- skipMany (satisfy (/='\n'))
- optional newline
- return ()
+comment = () <$ satisfyTok isCommentTok
+ where isCommentTok (Tok _ Comment _) = True
+ isCommentTok _ = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
-bgroup :: PandocMonad m => LP m ()
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
bgroup = try $ do
- skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
- () <$ char '{'
- <|> () <$ controlSeq "bgroup"
- <|> () <$ controlSeq "begingroup"
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-egroup :: PandocMonad m => LP m ()
-egroup = () <$ char '}'
- <|> () <$ controlSeq "egroup"
- <|> () <$ controlSeq "endgroup"
+egroup :: PandocMonad m => LP m Tok
+egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup")
-grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
grouped parser = try $ do
bgroup
-- first we check for an inner 'grouped', because
-- {{a,b}} should be parsed the same as {a,b}
- try (grouped parser <* egroup)
- <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m String
-braced = grouped chunk
- where chunk =
- many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
- <|> try (string "\\}")
- <|> try (string "\\{")
- <|> try (string "\\\\")
- <|> ((\x -> "{" ++ x ++ "}") <$> braced)
- <|> count 1 anyChar
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' 1
+ where braced' (n :: Int) =
+ handleEgroup n <|> handleBgroup n <|> handleOther n
+ handleEgroup n = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' (n - 1)
+ handleBgroup n = do
+ t <- bgroup
+ (t:) <$> braced' (n + 1)
+ handleOther n = do
+ t <- anyTok
+ (t:) <$> braced' n
bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
-
-mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
-mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
-
-mathInline :: PandocMonad m => LP m String -> LP m Inlines
-mathInline p = math <$> (try p >>= applyMacros')
-
-mathChars :: PandocMonad m => LP m String
-mathChars =
- concat <$> many (escapedChar
- <|> (snd <$> withRaw braced)
- <|> many1 (satisfy isOrdChar))
- where escapedChar = try $ do char '\\'
- c <- anyChar
- return ['\\',c]
- isOrdChar '$' = False
- isOrdChar '{' = False
- isOrdChar '}' = False
- isOrdChar '\\' = False
- isOrdChar _ = True
-
-quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
-quoted' f starter ender = do
- startchs <- starter
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then do
- ils <- many (notFollowedBy ender >> inline)
- (ender >> return (f (mconcat ils))) <|>
- (<> mconcat ils) <$>
- lit (case startchs of
- "``" -> "“"
- "`" -> "‘"
- _ -> startchs)
- else lit startchs
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
-doubleQuote :: PandocMonad m => LP m Inlines
-doubleQuote = do
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ (T.take 2 (T.reverse s)) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
-singleQuote :: PandocMonad m => LP m Inlines
-singleQuote = do
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- else str <$> many1 (oneOf "`\'‘’")
+-- inline elements:
-inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> inlineText
- <|> inlineCommand
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (char '-' *> option (str "-")
- (char '-' *> option (str "–") (str "—" <$ char '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (string "''"))
- <|> (str "”" <$ char '”')
- <|> (str "’" <$ char '\'')
- <|> (str "’" <$ char '’')
- <|> (str "\160" <$ char '~')
- <|> mathDisplay (string "$$" *> mathChars <* string "$$")
- <|> mathInline (char '$' *> mathChars <* char '$')
- <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str . (:[]) <$> tildeEscape)
- <|> (do res <- oneOf "#&~^'`\"[]"
- pos <- getPosition
- report $ ParsingUnescaped [res] pos
- return $ str [res])
+word :: PandocMonad m => LP m Inlines
+word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
-inlines :: PandocMonad m => LP m Inlines
-inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+regularSymbol :: PandocMonad m => LP m Inlines
+regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+ where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
+ isRegularSymbol _ = False
+ isSpecial c = c `Set.member` specialChars
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
@@ -268,467 +586,19 @@ inlineGroup = do
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
-block :: PandocMonad m => LP m Blocks
-block = (mempty <$ comment)
- <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
- <|> environment
- <|> include
- <|> macro
- <|> blockCommand
- <|> paragraph
- <|> grouped block
-
-blocks :: PandocMonad m => LP m Blocks
-blocks = mconcat <$> many block
-
-getRawCommand :: PandocMonad m => String -> LP m String
-getRawCommand name' = do
- rawargs <- withRaw (many (try (optional sp *> opt)) *>
- option "" (try (optional sp *> dimenarg)) *>
- many braced)
- return $ '\\' : name' ++ snd rawargs
-
-lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
-lookupListDefault d = (fromMaybe d .) . lookupList
- where
- lookupList l m = msum $ map (`M.lookup` m) l
-
-blockCommand :: PandocMonad m => LP m Blocks
-blockCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*" <* optional sp)
- let name' = name ++ star
- let raw = do
- rawcommand <- getRawCommand name'
- transformed <- applyMacros' rawcommand
- guard $ transformed /= rawcommand
- notFollowedBy $ parseFromString' inlines transformed
- parseFromString' blocks transformed
- lookupListDefault raw [name',name] blockCommands
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
--- eat an optional argument and one or more arguments in braces
-ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
-ignoreInlines name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawInline "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-guardRaw :: PandocMonad m => LP m ()
-guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex
-
-optargs :: PandocMonad m => LP m String
-optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
-ignoreBlocks name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
-blockCommands = M.fromList $
- [ ("par", mempty <$ skipopts)
- , ("parbox", braced >> grouped blocks)
- , ("title", mempty <$ (skipopts *>
- (grouped inline >>= addMeta "title")
- <|> (grouped block >>= addMeta "title")))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
- , ("author", mempty <$ (skipopts *> authors))
- -- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
- , ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- -- Koma-script metadata commands
- , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
- -- sectioning
- , ("part", section nullAttr (-1))
- , ("part*", section nullAttr (-1))
- , ("chapter", section nullAttr 0)
- , ("chapter*", section ("",["unnumbered"],[]) 0)
- , ("section", section nullAttr 1)
- , ("section*", section ("",["unnumbered"],[]) 1)
- , ("subsection", section nullAttr 2)
- , ("subsection*", section ("",["unnumbered"],[]) 2)
- , ("subsubsection", section nullAttr 3)
- , ("subsubsection*", section ("",["unnumbered"],[]) 3)
- , ("paragraph", section nullAttr 4)
- , ("paragraph*", section ("",["unnumbered"],[]) 4)
- , ("subparagraph", section nullAttr 5)
- , ("subparagraph*", section ("",["unnumbered"],[]) 5)
- -- beamer slides
- , ("frametitle", section nullAttr 3)
- , ("framesubtitle", section nullAttr 4)
- -- letters
- , ("opening", (para . trimInlines) <$> (skipopts *> tok))
- , ("closing", skipopts *> closing)
- --
- , ("hrule", pure horizontalRule)
- , ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> looseItem)
- , ("documentclass", skipopts *> braced *> preamble)
- , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> setCaption)
- , ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- , ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- -- includes
- , ("lstinputlisting", inputListing)
- , ("graphicspath", graphicsPath)
- -- hyperlink
- , ("hypertarget", braced >> grouped block)
- -- LaTeX colors
- , ("textcolor", coloredBlock "color")
- , ("colorbox", coloredBlock "background-color")
- ] ++ map ignoreBlocks
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks
- [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
- -- newcommand, etc. should be parsed by macro, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
- , "bibliographystyle"
- , "maketitle", "makeindex", "makeglossary"
- , "addcontentsline", "addtocontents", "addtocounter"
- -- \ignore{} is used conventionally in literate haskell for definitions
- -- that are to be processed by the compiler but not printed.
- , "ignore"
- , "hyperdef"
- , "markboth", "markright", "markleft"
- , "hspace", "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-coloredBlock :: PandocMonad m => String -> LP m Blocks
-coloredBlock stylename = do
- skipopts
- color <- braced
- let constructor = divWith ("",[],[("style",stylename ++ ": " ++ color)])
- inlineContents <|> constructor <$> blockContents
- where inlineContents = do
- ils <- grouped inline
- rest <- inlines
- return (para (ils <> rest))
- blockContents = grouped block
-
-graphicsPath :: PandocMonad m => LP m Blocks
-graphicsPath = do
- ps <- bgroup *> (manyTill braced egroup)
- getResourcePath >>= setResourcePath . (++ ps)
- return mempty
-
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
-addMeta field val = updateState $ \st ->
- st{ stateMeta = addMetaField field val $ stateMeta st }
-
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-
-setCaption :: PandocMonad m => LP m Blocks
-setCaption = do
- ils <- tok
- mblabel <- option Nothing $
- try $ spaces' >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("data-label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ stateCaption = Just ils' }
- return mempty
-
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
-
-authors :: PandocMonad m => LP m ()
-authors = try $ do
- bgroup
- let oneAuthor = mconcat <$>
- many1 (notFollowedBy' (controlSeq "and") >>
- (inline <|> mempty <$ blockCommand))
- -- skip e.g. \vspace{10pt}
- auths <- sepBy oneAuthor (controlSeq "and")
- egroup
- addMeta "author" (map trimInlines auths)
-
-section :: PandocMonad m => Attr -> Int -> LP m Blocks
-section (ident, classes, kvs) lvl = do
- skipopts
- contents <- grouped inline
- lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
- attr' <- registerHeader (lab, classes, kvs) contents
- return $ headerWith attr' lvl contents
-
-inlineCommand :: PandocMonad m => LP m Inlines
-inlineCommand = try $ do
- (name, raw') <- withRaw anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*")
- let name' = name ++ star
- let raw = do
- guard $ not (isBlockCommand name)
- rawargs <- withRaw
- (skipangles *> skipopts *> option "" dimenarg *> many braced)
- let rawcommand = raw' ++ star ++ snd rawargs
- transformed <- applyMacros' rawcommand
- exts <- getOption readerExtensions
- if transformed /= rawcommand
- then parseFromString' inlines transformed
- else if extensionEnabled Ext_raw_tex exts
- then return $ rawInline "latex" rawcommand
- else ignore rawcommand
- (lookupListDefault raw [name',name] inlineCommands <*
- optional (try (string "{}")))
-
-rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
- parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
- if parseRaw
- then rawInline "latex" <$> getRawCommand name'
- else fallback
-
-isBlockCommand :: String -> Bool
-isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
-
-
-inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
-inlineEnvironments = M.fromList
- [ ("displaymath", mathEnvWith id Nothing "displaymath")
- , ("math", math <$> mathEnv "math")
- , ("equation", mathEnvWith id Nothing "equation")
- , ("equation*", mathEnvWith id Nothing "equation*")
- , ("gather", mathEnvWith id (Just "gathered") "gather")
- , ("gather*", mathEnvWith id (Just "gathered") "gather*")
- , ("multline", mathEnvWith id (Just "gathered") "multline")
- , ("multline*", mathEnvWith id (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith id (Just "aligned") "align")
- , ("align*", mathEnvWith id (Just "aligned") "align*")
- , ("alignat", mathEnvWith id (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
- ]
-
-inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
-inlineCommands = M.fromList $
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
- , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
- , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
- , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
- , ("texttt", ttfamily)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("ldots", lit "…")
- , ("vdots", lit "\8942")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("label", rawInlineOr "label" (inBrackets <$> tok))
- , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
- , ("textgreek", tok)
- , ("sep", lit ",")
- , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
- , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
- , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", try $ tok >>= accent hungarumlaut)
- , ("`", option (str "`") $ try $ tok >>= accent grave)
- , ("'", option (str "'") $ try $ tok >>= accent acute)
- , ("^", option (str "^") $ try $ tok >>= accent circ)
- , ("~", option (str "~") $ try $ tok >>= accent tilde)
- , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
- , (".", option (str ".") $ try $ tok >>= accent dot)
- , ("=", option (str "=") $ try $ tok >>= accent macron)
- , ("c", option (str "c") $ try $ tok >>= accent cedilla)
- , ("v", option (str "v") $ try $ tok >>= accent hacek)
- , ("u", option (str "u") $ try $ tok >>= accent breve)
- , ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
- , (",", lit "\8198")
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", note <$> grouped block)
- , ("footnote", note <$> grouped block)
- , ("verb", doverb)
- , ("lstinline", dolstinline)
- , ("Verb", doverb)
- , ("url", (unescapeURL <$> braced) >>= \url ->
- pure (link url "" (str url)))
- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
- tok >>= \lab ->
- pure (link url "" lab))
- , ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . removeDoubleQuotes <$> braced
- mkImage options src)
- , ("enquote", enquote)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- , ("hypertarget", braced >> tok)
- -- siuntix
- , ("SI", dosiunitx)
- -- hyphenat
- , ("bshyp", lit "\\\173")
- , ("fshyp", lit "/\173")
- , ("dothyp", lit ".\173")
- , ("colonhyp", lit ":\173")
- , ("hyp", lit "-")
- , ("nohyphens", tok)
- , ("textnhtt", ttfamily)
- , ("nhttfamily", ttfamily)
- -- LaTeX colors
- , ("textcolor", coloredInline "color")
- , ("colorbox", coloredInline "background-color")
- -- fontawesome
- , ("faCheck", lit "\10003")
- , ("faClose", lit "\10007")
- ] ++ map ignoreInlines
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks:
- [ "index"
- , "hspace"
- , "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-coloredInline :: PandocMonad m => String -> LP m Inlines
-coloredInline stylename = do
- skipopts
- color <- braced
- spanWith ("",[],[("style",stylename ++ ": " ++ color)]) <$> tok
-
-ttfamily :: PandocMonad m => LP m Inlines
-ttfamily = (code . stringify . toList) <$> tok
+doLHSverb :: PandocMonad m => LP m Inlines
+doLHSverb =
+ (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
- let replaceTextwidth (k,v) = case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
- _ -> (k, v)
- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let replaceTextwidth (k,v) =
+ case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth
+ $ filter (\(k,_) -> k `elem` ["width", "height"]) options
let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
@@ -737,56 +607,131 @@ mkImage options src = do
return $ imageWith attr (addExtension src defaultExt) "" alt
_ -> return $ imageWith attr src "" alt
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
+-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
+dosiunitx :: PandocMonad m => LP m Inlines
+dosiunitx = do
+ skipopts
+ value <- tok
+ valueprefix <- option "" $ bracketed tok
+ unit <- tok
+ let emptyOr160 "" = ""
+ emptyOr160 _ = "\160"
+ return . mconcat $ [valueprefix,
+ emptyOr160 valueprefix,
+ value,
+ emptyOr160 unit,
+ unit]
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
+lit :: String -> LP m Inlines
+lit = pure . str
+
+removeDoubleQuotes :: Text -> Text
+removeDoubleQuotes t =
+ maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+
+doubleQuote :: PandocMonad m => LP m Inlines
+doubleQuote = do
+ quoted' doubleQuoted (try $ count 2 $ symbol '`')
+ (void $ try $ count 2 $ symbol '\'')
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`'])
+ (void $ try $ sequence [symbol '"', symbol '\''])
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '"')
+ (void $ symbol '"')
+
+singleQuote :: PandocMonad m => LP m Inlines
+singleQuote = do
+ quoted' singleQuoted ((:[]) <$> symbol '`')
+ (try $ symbol '\'' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ <|> quoted' singleQuoted ((:[]) <$> symbol '‘')
+ (try $ symbol '’' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ where startsWithLetter (Tok _ Word t) =
+ case T.uncons t of
+ Just (c, _) | isLetter c -> True
+ _ -> False
+ startsWithLetter _ = False
+
+quoted' :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> LP m [Tok]
+ -> LP m ()
+ -> LP m Inlines
+quoted' f starter ender = do
+ startchs <- (T.unpack . untokenize) <$> starter
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then do
+ ils <- many (notFollowedBy ender >> inline)
+ (ender >> return (f (mconcat ils))) <|>
+ (<> mconcat ils) <$>
+ lit (case startchs of
+ "``" -> "“"
+ "`" -> "‘"
+ cs -> cs)
+ else lit startchs
enquote :: PandocMonad m => LP m Inlines
enquote = do
skipopts
- context <- stateQuoteContext <$> getState
- if context == InDoubleQuote
+ quoteContext <- sQuoteContext <$> getState
+ if quoteContext == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
doverb :: PandocMonad m => LP m Inlines
doverb = do
- marker <- anyChar
- code <$> manyTill (satisfy (/='\n')) (char marker)
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ (code . T.unpack . untokenize) <$>
+ manyTill (verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok)
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar)
+ : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp
+ return $ Tok (lin, col) toktype t1
dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
options <- option [] keyvals
let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
- marker <- char '{' <|> anyChar
- codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker)
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ (codeWith ("",classes,[]) . T.unpack . untokenize) <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
-doLHSverb :: PandocMonad m => LP m Inlines
-doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
-
--- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
-dosiunitx :: PandocMonad m => LP m Inlines
-dosiunitx = do
- skipopts
- value <- tok
- valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']'))
- unit <- tok
- let emptyOr160 "" = ""
- emptyOr160 _ = "\160"
- return . mconcat $ [valueprefix,
- emptyOr160 valueprefix,
- value,
- emptyOr160 unit,
- unit]
+keyval :: PandocMonad m => LP m (String, String)
+keyval = try $ do
+ Tok _ Word key <- satisfyTok isWordTok
+ let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"]
+ isSpecSym _ = False
+ val <- option [] $ do
+ symbol '='
+ braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
+ <|> anyControlSeq))
+ optional sp
+ optional (symbol ',')
+ optional sp
+ return (T.unpack key, T.unpack . untokenize $ val)
-lit :: String -> LP m Inlines
-lit = pure . str
+keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
@@ -994,18 +939,149 @@ breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+mathDisplay :: String -> Inlines
+mathDisplay = displayMath . trim
+
+mathInline :: String -> Inlines
+mathInline = math . trim
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ contents <- trim . toksToString <$>
+ many (notFollowedBy (symbol '$') >> anyTok)
+ if display
+ then do
+ mathDisplay contents <$ try (symbol '$' >> symbol '$')
+ <|> (guard (null contents) >> return (mathInline ""))
+ else mathInline contents <$ (symbol '$')
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: PandocMonad m => LP m [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> (manyTill citationLabel egroup)
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+citationLabel :: PandocMonad m => LP m String
+citationLabel = do
+ optional sp
+ toksToString <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* optional sp
+ <* optional (symbol ',')
+ <* optional sp)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char]
+
+cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+
+citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw))
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
+complexNatbibCitation mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` (symbol ';')
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
+inlineCommand' :: PandocMonad m => LP m Inlines
+inlineCommand' = try $ do
+ Tok _ (CtrlSeq name) cmd <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name] -- check non-starred as fallback
+ let raw = do
+ guard $ isInlineCommand name || not (isBlockCommand name)
+ rawcommand <- getRawCommand (cmd <> star)
+ (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand))
+ <|> ignore rawcommand
+ lookupListDefault raw names inlineCommands
+
tok :: PandocMonad m => LP m Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
+tok = grouped inline <|> inlineCommand' <|> singleChar
+ where singleChar = try $ do
+ Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ (Tok (lin, col + 1) toktype t2) : inp
+ return $ str (T.unpack t1)
+ else return $ str (T.unpack t)
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
-rawopt :: PandocMonad m => LP m String
+rawopt :: PandocMonad m => LP m Text
rawopt = do
- contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
- try (string "\\[") <|> rawopt)
+ symbol '['
+ inner <- untokenize <$> manyTill anyTok (symbol ']')
optional sp
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> inner <> "]"
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
@@ -1013,58 +1089,719 @@ skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
- char '<'
- skipMany (noneOf ">")
- char '>'
- return ()
+ symbol '<'
+ () <$ manyTill anyTok (symbol '>')
skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
-inlineText :: PandocMonad m => LP m Inlines
-inlineText = str <$> many1 inlineChar
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (0,0) Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
+ "\\end{" ++ T.unpack y ++ "}"
+
+mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ T.unpack $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ ]
+
+inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineCommands = M.fromList $
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+ , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+ , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+ , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+ , ("texttt", ttfamily)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", rawInlineOr "label" (inBrackets <$> tok))
+ , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
+ , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", try $ tok >>= accent hungarumlaut)
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent circ)
+ , ("~", option (str "~") $ try $ tok >>= accent tilde)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , (".", option (str ".") $ try $ tok >>= accent dot)
+ , ("=", option (str "=") $ try $ tok >>= accent macron)
+ , ("c", option (str "c") $ try $ tok >>= accent cedilla)
+ , ("v", option (str "v") $ try $ tok >>= accent hacek)
+ , ("u", option (str "u") $ try $ tok >>= accent breve)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional (bracketed inline)
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", note <$> grouped block)
+ , ("footnote", note <$> grouped block)
+ , ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("Verb", doverb)
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ pure (link url "" (str url)))
+ , ("href", (unescapeURL . toksToString <$>
+ braced <* optional sp) >>= \url ->
+ tok >>= \lab -> pure (link url "" lab))
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL . T.unpack .
+ removeDoubleQuotes . untokenize <$> braced
+ mkImage options src)
+ , ("enquote", enquote)
+ , ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ , ("hypertarget", braced >> tok)
+ -- siuntix
+ , ("SI", dosiunitx)
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ , ("nohyphens", tok)
+ , ("textnhtt", ttfamily)
+ , ("nhttfamily", ttfamily)
+ -- LaTeX colors
+ , ("textcolor", coloredInline "color")
+ , ("colorbox", coloredInline "background-color")
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ ]
+
+coloredInline :: PandocMonad m => String -> LP m Inlines
+coloredInline stylename = do
+ skipopts
+ color <- braced
+ spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok
+
+ttfamily :: PandocMonad m => LP m Inlines
+ttfamily = (code . stringify . toList) <$> tok
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name'
+ else fallback
+
+getRawCommand :: PandocMonad m => Text -> LP m String
+getRawCommand txt = do
+ (_, rawargs) <- withRaw
+ ((if txt == "\\write"
+ then () <$ satisfyTok isWordTok -- digits
+ else return ()) *>
+ skipangles *>
+ skipopts *>
+ option "" (try (optional sp *> dimenarg)) *>
+ many braced)
+ return $ T.unpack (txt <> untokenize rawargs)
+
+isBlockCommand :: Text -> Bool
+isBlockCommand s =
+ s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks))
+ || s `Set.member` treatAsBlock
+
+treatAsBlock :: Set.Set Text
+treatAsBlock = Set.fromList
+ [ "newcommand", "renewcommand"
+ , "newenvironment", "renewenvironment"
+ , "providecommand", "provideenvironment"
+ -- newcommand, etc. should be parsed by macroDef, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "markboth", "markright", "markleft"
+ , "hspace", "vspace"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+isInlineCommand :: Text -> Bool
+isInlineCommand s =
+ s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines))
+ || s `Set.member` treatAsInline
+
+treatAsInline :: Set.Set Text
+treatAsInline = Set.fromList
+ [ "index"
+ , "hspace"
+ , "vspace"
+ , "noindent"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where lookupList l m = msum $ map (`M.lookup` m) l
+
+inline :: PandocMonad m => LP m Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ whitespace)
+ <|> (softbreak <$ endline)
+ <|> word
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ <|> (symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-')))
+ <|> doubleQuote
+ <|> singleQuote
+ <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
+ <|> (str "”" <$ symbol '”')
+ <|> (str "’" <$ symbol '\'')
+ <|> (str "’" <$ symbol '’')
+ <|> (str "\160" <$ symbol '~')
+ <|> dollarsMath
+ <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
+ <|> (str . (:[]) <$> primEscape)
+ <|> regularSymbol
+ <|> (do res <- symbolIn "#^'`\"[]"
+ pos <- getPosition
+ let s = T.unpack (untoken res)
+ report $ ParsingUnescaped s pos
+ return $ str s)
+
+inlines :: PandocMonad m => LP m Inlines
+inlines = mconcat <$> many inline
+
+-- block elements:
+
+begin_ :: PandocMonad m => Text -> LP m ()
+begin_ t = (try $ do
+ controlSeq "begin"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}")
+
+end_ :: PandocMonad m => Text -> LP m ()
+end_ t = (try $ do
+ controlSeq "end"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}")
+
+preamble :: PandocMonad m => LP m Blocks
+preamble = mempty <$ many preambleBlock
+ where preambleBlock = spaces1
+ <|> void include
+ <|> void macroDef
+ <|> void blockCommand
+ <|> void braced
+ <|> (notFollowedBy (begin_ "document") >> void anyTok)
-inlineChar :: PandocMonad m => LP m Char
-inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
+paragraph :: PandocMonad m => LP m Blocks
+paragraph = do
+ x <- trimInlines . mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para x
+
+include :: PandocMonad m => LP m Blocks
+include = do
+ (Tok _ (CtrlSeq name) _) <-
+ controlSeq "include" <|> controlSeq "input" <|>
+ controlSeq "subfile" <|> controlSeq "usepackage"
+ skipMany $ bracketed inline -- skip options
+ fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced
+ let fs' = if name == "usepackage"
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs'
+
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
+
+authors :: PandocMonad m => LP m ()
+authors = try $ do
+ bgroup
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >>
+ (inline <|> mempty <$ blockCommand))
+ -- skip e.g. \vspace{10pt}
+ auths <- sepBy oneAuthor (controlSeq "and")
+ egroup
+ addMeta "author" (map trimInlines auths)
+
+macroDef :: PandocMonad m => LP m Blocks
+macroDef = do
+ guardEnabled Ext_latex_macros
+ mempty <$ ((commandDef <|> environmentDef) <* doMacros 0)
+ where commandDef = do
+ (name, macro') <- newcommand
+ updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) }
+ environmentDef = do
+ (name, macro1, macro2) <- newenvironment
+ updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand"
+ optional $ symbol '*'
+ Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents <- braced
+ when (mtype == "newcommand") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ optional $ symbol '*'
+ symbol '{'
+ spaces
+ Tok _ Word name <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ startcontents <- braced
+ spaces
+ endcontents <- braced
+ when (mtype == "newenvironment") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg startcontents,
+ Macro 0 Nothing endcontents)
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ manyTill anyTok (symbol ']')
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead (T.unpack ds) of
+ Just i -> return i
+ _ -> return 0
+
+setCaption :: PandocMonad m => LP m Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ sCaption = Just ils' }
+ return mempty
+
+looseItem :: PandocMonad m => LP m Blocks
+looseItem = do
+ inListItem <- sInListItem <$> getState
+ guard $ not inListItem
+ skipopts
+ return mempty
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
+ skipopts
+ contents <- grouped inline
+ lab <- option ident $
+ try (spaces >> controlSeq "label"
+ >> spaces >> toksToString <$> braced)
+ attr' <- registerHeader (lab, classes, kvs) contents
+ return $ headerWith attr' lvl contents
+
+blockCommand :: PandocMonad m => LP m Blocks
+blockCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name]
+ let raw = do
+ guard $ isBlockCommand name || not (isInlineCommand name)
+ rawBlock "latex" <$> getRawCommand (txt <> star)
+ lookupListDefault raw names blockCommands
+
+closing :: PandocMonad m => LP m Blocks
+closing = do
+ contents <- tok
+ st <- getState
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (sMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
+ return $ para (trimInlines contents) <> sigs
+
+blockCommands :: PandocMonad m => M.Map Text (LP m Blocks)
+blockCommands = M.fromList $
+ [ ("par", mempty <$ skipopts)
+ , ("parbox", braced >> grouped blocks)
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
+ -- Koma-script metadata commands
+ , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
+ -- sectioning
+ , ("part", section nullAttr (-1))
+ , ("part*", section nullAttr (-1))
+ , ("chapter", section nullAttr 0)
+ , ("chapter*", section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
+ -- beamer slides
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ --
+ , ("hrule", pure horizontalRule)
+ , ("strut", pure mempty)
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("item", looseItem)
+ , ("documentclass", skipopts *> braced *> preamble)
+ , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+ , ("caption", skipopts *> setCaption)
+ , ("bibliography", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ , ("addbibresource", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ -- includes
+ , ("lstinputlisting", inputListing)
+ , ("graphicspath", graphicsPath)
+ -- hyperlink
+ , ("hypertarget", try $ braced >> grouped block)
+ -- LaTeX colors
+ , ("textcolor", coloredBlock "color")
+ , ("colorbox", coloredBlock "background-color")
+ ]
+
+
+environments :: PandocMonad m => M.Map Text (LP m Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
+ , ("minipage", env "minipage" $
+ skipopts *> spaces *> optional braced *> spaces *> blocks)
+ , ("figure", env "figure" $ skipopts *> figure)
+ , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
+ , ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> simpTable "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular" $ simpTable "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable "tabular" False)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", orderedList')
+ , ("alltt", alltt <$> env "alltt" blocks)
+ , ("code", guardEnabled Ext_literate_haskell *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
+ , ("Verbatim", fancyverbEnv "Verbatim")
+ , ("BVerbatim", fancyverbEnv "BVerbatim")
+ , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
+ codeBlockWith attr <$> verbEnv "lstlisting")
+ , ("minted", minted)
+ , ("obeylines", obeylines)
+ , ("displaymath", mathEnvWith para Nothing "displaymath")
+ , ("equation", mathEnvWith para Nothing "equation")
+ , ("equation*", mathEnvWith para Nothing "equation*")
+ , ("gather", mathEnvWith para (Just "gathered") "gather")
+ , ("gather*", mathEnvWith para (Just "gathered") "gather*")
+ , ("multline", mathEnvWith para (Just "gathered") "multline")
+ , ("multline*", mathEnvWith para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith para (Just "aligned") "align")
+ , ("align*", mathEnvWith para (Just "aligned") "align*")
+ , ("alignat", mathEnvWith para (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
+ , ("tikzpicture", rawVerbEnv "tikzpicture")
+ ]
environment :: PandocMonad m => LP m Blocks
environment = do
controlSeq "begin"
- name <- braced
+ name <- untokenize <$> braced
M.findWithDefault mzero name environments
<|> rawEnv name
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name inlineEnvironments
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
-rawEnv :: PandocMonad m => String -> LP m Blocks
+rawEnv :: PandocMonad m => Text -> LP m Blocks
rawEnv name = do
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
rawOptions <- mconcat <$> many rawopt
- let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
+ let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
pos1 <- getPosition
(bs, raw) <- withRaw $ env name blocks
- raw' <- applyMacros' $ beginCommand ++ raw
- if raw' /= beginCommand ++ raw
- then parseFromString' blocks raw'
- else if parseRaw
- then return $ rawBlock "latex" $ beginCommand ++ raw'
- else do
- unless parseRaw $ do
- report $ SkippedContent beginCommand pos1
- pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
- return bs
-
-rawVerbEnv :: PandocMonad m => String -> LP m Blocks
+ if parseRaw
+ then return $ rawBlock "latex"
+ $ T.unpack $ beginCommand <> untokenize raw
+ else do
+ unless parseRaw $ do
+ report $ SkippedContent (T.unpack beginCommand) pos1
+ pos2 <- getPosition
+ report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ return bs
+
+rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{tikzpicture}" ++ raw
+ let raw' = "\\begin{tikzpicture}" ++ toksToString raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -1073,36 +1810,118 @@ rawVerbEnv name = do
report $ SkippedContent raw' pos
return mempty
-----
+verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv name = withVerbatimMode $ do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ toksToString res
-maybeAddExtension :: String -> FilePath -> FilePath
-maybeAddExtension ext fp =
- if null (takeExtension fp)
- then addExtension fp ext
- else fp
+fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
+fancyverbEnv name = do
+ options <- option [] keyvals
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv name
-include :: PandocMonad m => LP m Blocks
-include = do
- fs' <- try $ do
- char '\\'
- name <- try (string "include")
- <|> try (string "input")
- <|> try (string "subfile")
- <|> string "usepackage"
- -- skip options
- skipMany $ try $ char '[' *> manyTill anyChar (char ']')
- fs <- (map trim . splitBy (==',')) <$> braced
- return $ if name == "usepackage"
- then map (maybeAddExtension ".sty") fs
- else map (maybeAddExtension ".tex") fs
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mconcat <$> mapM (insertIncludedFile blocks dirs) fs'
+obeylines :: PandocMonad m => LP m Blocks
+obeylines = do
+ para . fromList . removeLeadingTrailingBreaks .
+ walk softBreakToHard . toList <$> env "obeylines" inlines
+ where softBreakToHard SoftBreak = LineBreak
+ softBreakToHard x = x
+ removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak .
+ reverse . dropWhile isLineBreak
+ isLineBreak LineBreak = True
+ isLineBreak _ = False
+
+minted :: PandocMonad m => LP m Blocks
+minted = do
+ options <- option [] keyvals
+ lang <- toksToString <$> braced
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ lang | not (null lang) ] ++
+ [ "numberLines" |
+ lookup "linenos" options == Just "true" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv "minted"
+
+letterContents :: PandocMonad m => LP m Blocks
+letterContents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case lookupMeta "address" (sMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
+ return $ addr <> bs -- sig added by \closing
+
+figure :: PandocMonad m => LP m Blocks
+figure = try $ do
+ resetCaption
+ blocks >>= addImageCaption
+
+addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
+addImageCaption = walkM go
+ where go (Image attr alt (src,tit))
+ | not ("fig:" `isPrefixOf` tit) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
+ Nothing -> Image attr alt (src,tit)
+ go x = return x
+
+coloredBlock :: PandocMonad m => String -> LP m Blocks
+coloredBlock stylename = do
+ skipopts
+ color <- braced
+ let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)])
+ inlineContents <|> constructor <$> blockContents
+ where inlineContents = do
+ ils <- grouped inline
+ rest <- inlines
+ return (para (ils <> rest))
+ blockContents = grouped block
+
+graphicsPath :: PandocMonad m => LP m Blocks
+graphicsPath = do
+ ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ getResourcePath >>= setResourcePath . (++ ps)
+ return mempty
+
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+
+alltt :: Blocks -> Blocks
+alltt = walk strToCode
+ where strToCode (Str s) = Code nullAttr s
+ strToCode Space = RawInline (Format "latex") "\\ "
+ strToCode SoftBreak = LineBreak
+ strToCode x = x
+
+parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions options =
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ ++ maybeToList (lookup "language" options
+ >>= fromListingsLanguage)
+ in (fromMaybe "" (lookup "label" options), classes, kvs)
inputListing :: PandocMonad m => LP m Blocks
inputListing = do
pos <- getPosition
options <- option [] keyvals
- f <- filter (/='"') <$> braced
+ f <- filter (/='"') . toksToString <$> braced
dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
mbCode <- readFileFromDirs dirs f
codeLines <- case mbCode of
@@ -1121,169 +1940,10 @@ inputListing = do
drop (firstline - 1) codeLines
return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
-parseListingsOptions :: [(String, String)] -> Attr
-parseListingsOptions options =
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- ++ maybeToList (lookup "language" options
- >>= fromListingsLanguage)
- in (fromMaybe "" (lookup "label" options), classes, kvs)
-
-----
-
-keyval :: PandocMonad m => LP m (String, String)
-keyval = try $ do
- key <- many1 alphaNum
- val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\"))
- skipMany spaceChar
- optional (char ',')
- skipMany spaceChar
- return (key, val)
-
-
-keyvals :: PandocMonad m => LP m [(String, String)]
-keyvals = try $ char '[' *> manyTill keyval (char ']')
-
-alltt :: PandocMonad m => String -> LP m Blocks
-alltt t = walk strToCode <$> parseFromString' blocks
- (substitute " " "\\ " $ substitute "%" "\\%" $
- intercalate "\\\\\n" $ lines t)
- where strToCode (Str s) = Code nullAttr s
- strToCode x = x
-
-rawLaTeXBlock :: PandocMonad m => LP m String
-rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-
-rawLaTeXInline :: PandocMonad m => LP m Inline
-rawLaTeXInline = do
- raw <- (snd <$> withRaw inlineCommand)
- <|> (snd <$> withRaw inlineEnvironment)
- <|> (snd <$> withRaw blockCommand)
- RawInline "latex" <$> applyMacros' raw
-
-addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
-addImageCaption = walkM go
- where go (Image attr alt (src,tit))
- | not ("fig:" `isPrefixOf` tit) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
- Nothing -> Image attr alt (src,tit)
- go x = return x
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table c als ws hs rs) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Table (toList ils) als ws hs rs
- Nothing -> Table c als ws hs rs
- go x = return x
-
-environments :: PandocMonad m => M.Map String (LP m Blocks)
-environments = M.fromList
- [ ("document", env "document" blocks <* skipMany anyChar)
- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
- , ("letter", env "letter" letterContents)
- , ("minipage", env "minipage" $
- skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- , ("figure", env "figure" $ skipopts *> figure)
- , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
- , ("center", env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable "longtable" False >>= addTableCaption)
- , ("table", env "table" $
- resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular" $ simpTable "tabular*" True)
- , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
- , ("tabular", env "tabular" $ simpTable "tabular" False)
- , ("quote", blockQuote <$> env "quote" blocks)
- , ("quotation", blockQuote <$> env "quotation" blocks)
- , ("verse", blockQuote <$> env "verse" blocks)
- , ("itemize", bulletList <$> listenv "itemize" (many item))
- , ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", orderedList')
- , ("alltt", alltt =<< verbEnv "alltt")
- , ("code", guardEnabled Ext_literate_haskell *>
- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- verbEnv "code"))
- , ("comment", mempty <$ verbEnv "comment")
- , ("verbatim", codeBlock <$> verbEnv "verbatim")
- , ("Verbatim", fancyverbEnv "Verbatim")
- , ("BVerbatim", fancyverbEnv "BVerbatim")
- , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
- codeBlockWith attr <$> verbEnv "lstlisting")
- , ("minted", do options <- option [] keyvals
- lang <- grouped (many1 $ satisfy (/='}'))
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
- [ "numberLines" |
- lookup "linenos" options == Just "true" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv "minted")
- , ("obeylines", parseFromString
- (para . trimInlines . mconcat <$> many inline) =<<
- intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnvWith para Nothing "displaymath")
- , ("equation", mathEnvWith para Nothing "equation")
- , ("equation*", mathEnvWith para Nothing "equation*")
- , ("gather", mathEnvWith para (Just "gathered") "gather")
- , ("gather*", mathEnvWith para (Just "gathered") "gather*")
- , ("multline", mathEnvWith para (Just "gathered") "multline")
- , ("multline*", mathEnvWith para (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith para (Just "aligned") "align")
- , ("align*", mathEnvWith para (Just "aligned") "align*")
- , ("alignat", mathEnvWith para (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
- , ("tikzpicture", rawVerbEnv "tikzpicture")
- ]
-
-figure :: PandocMonad m => LP m Blocks
-figure = try $ do
- resetCaption
- blocks >>= addImageCaption
-
-letterContents :: PandocMonad m => LP m Blocks
-letterContents = do
- bs <- blocks
- st <- getState
- -- add signature (author) and address (title)
- let addr = case lookupMeta "address" (stateMeta st) of
- Just (MetaBlocks [Plain xs]) ->
- para $ trimInlines $ fromList xs
- _ -> mempty
- return $ addr <> bs -- sig added by \closing
-
-closing :: PandocMonad m => LP m Blocks
-closing = do
- contents <- tok
- st <- getState
- let extractInlines (MetaBlocks [Plain ys]) = ys
- extractInlines (MetaBlocks [Para ys ]) = ys
- extractInlines _ = []
- let sigs = case lookupMeta "author" (stateMeta st) of
- Just (MetaList xs) ->
- para $ trimInlines $ fromList $
- intercalate [LineBreak] $ map extractInlines xs
- _ -> mempty
- return $ para (trimInlines contents) <> sigs
+-- lists
item :: PandocMonad m => LP m Blocks
-item = blocks *> controlSeq "item" *> skipopts *> blocks
-
-looseItem :: PandocMonad m => LP m Blocks
-looseItem = do
- ctx <- stateParserContext `fmap` getState
- if ctx == ListItemState
- then mzero
- else return mempty
+item = void blocks *> controlSeq "item" *> skipopts *> blocks
descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
@@ -1294,302 +1954,210 @@ descItem = do
bs <- blocks
return (ils, [bs])
-env :: PandocMonad m => String -> LP m a -> LP m a
-env name p = p <*
- (try (controlSeq "end" *> braced >>= guard . (== name))
- <?> ("\\end{" ++ name ++ "}"))
-
-listenv :: PandocMonad m => String -> LP m a -> LP m a
+listenv :: PandocMonad m => Text -> LP m a -> LP m a
listenv name p = try $ do
- oldCtx <- stateParserContext `fmap` getState
- updateState $ \st -> st{ stateParserContext = ListItemState }
+ oldInListItem <- sInListItem `fmap` getState
+ updateState $ \st -> st{ sInListItem = True }
res <- env name p
- updateState $ \st -> st{ stateParserContext = oldCtx }
+ updateState $ \st -> st{ sInListItem = oldInListItem }
return res
-mathEnvWith :: PandocMonad m
- => (Inlines -> a) -> Maybe String -> String -> LP m a
-mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name)
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
- "\\end{" ++ y ++ "}"
-
-mathEnv :: PandocMonad m => String -> LP m String
-mathEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = skipMany comment *>
- (many1 (noneOf "\\%") <|> try (string "\\%")
- <|> try (string "\\\\") <|> count 1 anyChar)
- res <- concat <$> manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-verbEnv :: PandocMonad m => String -> LP m String
-verbEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = anyChar
- res <- manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-fancyverbEnv :: PandocMonad m => String -> LP m Blocks
-fancyverbEnv name = do
- options <- option [] keyvals
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv name
-
orderedList' :: PandocMonad m => LP m Blocks
orderedList' = try $ do
- optional sp
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
+ let markerSpec = do
+ symbol '['
+ ts <- toksToString <$> manyTill anyTok (symbol ']')
+ case runParser anyOrderedListMarker def "option" ts of
+ Right r -> return r
+ Left _ -> do
+ pos <- getPosition
+ report $ SkippedContent ("[" ++ ts ++ "]") pos
+ return (1, DefaultStyle, DefaultDelim)
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
- start <- option 1 $ try $ do controlSeq "setcounter"
- grouped (string "enum" *> many1 (oneOf "iv"))
+ optional $ try $ controlSeq "setlength"
+ *> grouped (count 1 $ controlSeq "itemindent")
+ *> braced
+ spaces
+ start <- option 1 $ try $ do pos <- getPosition
+ controlSeq "setcounter"
+ ctr <- toksToString <$> braced
+ guard $ "enum" `isPrefixOf` ctr
+ guard $ all (`elem` ['i','v']) (drop 4 ctr)
optional sp
- num <- grouped (many1 digit)
- spaces
- return (read num + 1 :: Int)
+ num <- toksToString <$> braced
+ case safeRead num of
+ Just i -> return (i + 1 :: Int)
+ Nothing -> do
+ report $ SkippedContent
+ ("\\setcounter{" ++ ctr ++
+ "}{" ++ num ++ "}") pos
+ return 1
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
-paragraph :: PandocMonad m => LP m Blocks
-paragraph = do
- x <- trimInlines . mconcat <$> many1 inline
- if x == mempty
- then return mempty
- else return $ para x
-
-preamble :: PandocMonad m => LP m Blocks
-preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = void comment
- <|> void sp
- <|> void blanklines
- <|> void include
- <|> void macro
- <|> void blockCommand
- <|> void anyControlSeq
- <|> void braced
- <|> void anyChar
-
--------
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- keys <- try $ bgroup *> (manyTill citationLabel egroup)
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m String
-citationLabel = optional sp *>
- (many1 (satisfy isBibtexKeyChar)
- <* optional sp
- <* optional (char ',')
- <* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
+-- tables
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then many1 simpleCiteArgs
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional $ bracketed inline
+ return ()
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
- let ils = (toList . trimInlines . mconcat) <$>
- many (notFollowedBy (oneOf "\\};") >> inline)
- let parseOne = try $ do
- skipSpaces
- pref <- ils
- cit' <- inline -- expect a citation
- let citlist = toList cit'
- cits' <- case citlist of
- [Cite cs _] -> return cs
- _ -> mzero
- suff <- ils
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff cits'
- (c:cits, raw) <- withRaw $ grouped parseOne
- return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ raw)
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
--- tables
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) -> do
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ _ -> return ()
-parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))]
+parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
parseAligns = try $ do
- bgroup
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
- maybeBar
- let cAlign = AlignCenter <$ char 'c'
- let lAlign = AlignLeft <$ char 'l'
- let rAlign = AlignRight <$ char 'r'
- let parAlign = AlignLeft <$ char 'p'
- -- algins from tabularx
- let xAlign = AlignLeft <$ char 'X'
- let mAlign = AlignLeft <$ char 'm'
- let bAlign = AlignLeft <$ char 'b'
- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
- <|> xAlign <|> mAlign <|> bAlign
- let alignPrefix = char '>' >> braced
- let alignSuffix = char '<' >> braced
+ let maybeBar = skipMany $
+ sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced)
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
- char '{'
- ds <- many1 (oneOf "0123456789.")
+ symbol '{'
+ ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
spaces
- string "\\linewidth"
- char '}'
+ symbol '}'
case safeRead ds of
Just w -> return w
Nothing -> return 0.0
- let alignSpec = do
+ let alignSpec = try $ do
spaces
- pref <- option "" alignPrefix
+ pref <- option [] alignPrefix
spaces
al <- alignChar
- width <- colWidth <|> option 0.0 (do s <- braced
+ width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
pos <- getPosition
report $ SkippedContent s pos
return 0.0)
spaces
- suff <- option "" alignSuffix
+ suff <- option [] alignSuffix
return (al, width, (pref, suff))
- aligns' <- sepEndBy alignSpec maybeBar
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many (alignSpec <* maybeBar)
spaces
egroup
spaces
- return $ aligns'
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces'
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces'
- optional $ bracketed (many1 (satisfy (/=']')))
- return ()
-
-lbreak :: PandocMonad m => LP m ()
-lbreak = () <$ try (spaces' *>
- (controlSeq "\\" <|> controlSeq "tabularnewline") <*
- spaces')
-
-amp :: PandocMonad m => LP m ()
-amp = () <$ try (spaces' *> char '&' <* spaces')
+ return aligns'
parseTableRow :: PandocMonad m
- => String -- ^ table environment name
- -> [(String, String)] -- ^ pref/suffixes
+ => Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
-> LP m [Blocks]
-parseTableRow envname prefsufs = try $ do
+parseTableRow envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
let cols = length prefsufs
- let tableCellRaw = concat <$> many
- (do notFollowedBy amp
- notFollowedBy lbreak
- notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}"))
- many1 (noneOf "&%\n\r\\")
- <|> try (string "\\&")
- <|> count 1 anyChar)
- let plainify bs = case toList bs of
- [Para ils] -> plain (fromList ils)
- _ -> bs
- rawcells <- sepBy1 tableCellRaw amp
- guard $ length rawcells == cols
- let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs
- let tableCell = plainify <$> blocks
- cells' <- mapM (parseFromString' tableCell) rawcells'
- let numcells = length cells'
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- many (notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ >> anyTok)
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref
+ ++ contents ++
+ map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff
+ rawcells <- sequence (map celltoks prefsufs)
+ oldInput <- getInput
+ cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells
+ setInput oldInput
+ spaces
+ let numcells = length cells
guard $ numcells <= cols && numcells >= 1
- guard $ cells' /= [mempty]
+ guard $ cells /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
- let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces'
- return cells''
+ return $ cells ++ replicate (cols - numcells) mempty
-spaces' :: PandocMonad m => LP m ()
-spaces' = spaces *> skipMany (comment *> spaces)
+parseTableCell :: PandocMonad m => LP m Blocks
+parseTableCell = do
+ let plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+ updateState $ \st -> st{ sInTableCell = True }
+ cells <- plainify <$> blocks
+ updateState $ \st -> st{ sInTableCell = False }
+ return cells
-simpTable :: PandocMonad m => String -> Bool -> LP m Blocks
+simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
simpTable envname hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces' >> tok)
+ when hasWidthParameter $ () <$ (spaces >> tok)
skipopts
colspecs <- parseAligns
let (aligns, widths, prefsufs) = unzip3 colspecs
let cols = length colspecs
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
skipMany hline
- spaces'
+ spaces
header' <- option [] $ try (parseTableRow envname prefsufs <*
lbreak <* many1 hline)
- spaces'
+ spaces
rows <- sepEndBy (parseTableRow envname prefsufs)
(lbreak <* optional (skipMany hline))
- spaces'
+ spaces
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
let header'' = if null header'
then replicate cols mempty
else header'
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns widths) header'' rows
-removeDoubleQuotes :: String -> String
-removeDoubleQuotes ('"':xs) =
- case reverse xs of
- '"':ys -> reverse ys
- _ -> '"':xs
-removeDoubleQuotes xs = xs
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table c als ws hs rs) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
+ go x = return x
+
+
+block :: PandocMonad m => LP m Blocks
+block = (mempty <$ spaces1)
+ <|> environment
+ <|> include
+ <|> macroDef
+ <|> blockCommand
+ <|> paragraph
+ <|> grouped block
+
+blocks :: PandocMonad m => LP m Blocks
+blocks = mconcat <$> many block
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
new file mode 100644
index 000000000..6f84ae1f1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -0,0 +1,48 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Types
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Types for LaTeX tokens and macros.
+-}
+module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
+ , TokType(..)
+ , Macro(..)
+ , Line
+ , Column )
+where
+import Data.Text (Text)
+import Text.Parsec.Pos (Line, Column)
+
+data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
+ Esc1 | Esc2 | Arg Int
+ deriving (Eq, Ord, Show)
+
+data Tok = Tok (Line, Column) TokType Text
+ deriving (Eq, Ord, Show)
+
+data Macro = Macro Int (Maybe [Tok]) [Tok]
+ deriving Show
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e1c481311..ab6a32b78 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -52,15 +52,17 @@ import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..), report)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
-import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros,
+ macro)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
@@ -74,7 +76,7 @@ readMarkdown :: PandocMonad m
-> m Pandoc
readMarkdown opts s = do
parsed <- (readWithM parseMarkdown) def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -291,18 +293,22 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
-toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x)
- where
- toMeta p = do
- p' <- p
- return $
- case B.toList p' of
- [Plain xs] -> MetaInlines xs
- [Para xs]
- | endsWithNewline x -> MetaBlocks [Para xs]
- | otherwise -> MetaInlines xs
- bs -> MetaBlocks bs
- endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+toMetaValue x =
+ parseFromString' parser' (T.unpack x)
+ where parser' = (asInlines <$> ((trimInlinesF . mconcat)
+ <$> (guard (not endsWithNewline)
+ *> manyTill inline eof)))
+ <|> (asBlocks <$> parseBlocks)
+ asBlocks p = do
+ p' <- p
+ return $ MetaBlocks (B.toList p')
+ asInlines p = do
+ p' <- p
+ return $ MetaInlines (B.toList p')
+ endsWithNewline = T.pack "\n" `T.isSuffixOf` x
+ -- Note: a standard quoted or unquoted YAML value will
+ -- not end in a newline, but a "block" set off with
+ -- `|` or `>` will.
yamlToMeta :: PandocMonad m
=> Yaml.Value -> MarkdownParser m (F MetaValue)
@@ -368,13 +374,14 @@ parseMarkdown = do
-- lookup to get sourcepos
case M.lookup n (stateNotes' st) of
Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
- Nothing -> error "The impossible happened.") notesDefined
+ Nothing -> throwError $
+ PandocShouldNeverHappenError "note not found")
+ notesDefined
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
meta <- stateMeta' st
return $ Pandoc meta bs) st
reportLogMessages
- (do guardEnabled Ext_east_asian_line_breaks
- return $ eastAsianLineBreakFilter doc) <|> return doc
+ return doc
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
@@ -488,7 +495,6 @@ parseBlocks = mconcat <$> manyTill block eof
block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
- pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
@@ -514,8 +520,7 @@ block = do
, para
, plain
] <?> "block"
- report $ ParsingTrace
- (take 60 $ show $ B.toList $ runF res defaultParserState) pos
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
return res
--
@@ -680,19 +685,36 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
+rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute = do
+ char '{'
+ skipMany spaceChar
+ char '='
+ format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ skipMany spaceChar
+ char '}'
+ return format
+
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
- attr <- option ([],[],[]) $
- try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
blankline
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+ contents <- intercalate "\n" <$>
+ manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawBlock syn contents
+ Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
@@ -1013,7 +1035,8 @@ para = try $ do
result' <- result
case B.toList result' of
[Image attr alt (src,tit)]
- | Ext_implicit_figures `extensionEnabled` exts ->
+ | not (null alt) &&
+ Ext_implicit_figures `extensionEnabled` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
$ Image attr alt (src,'f':'i':'g':':':tit)
@@ -1083,10 +1106,11 @@ latexMacro = try $ do
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
+ result <- (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
+ <|> (B.rawBlock "latex" . concat <$>
+ rawLaTeXBlock `sepEndBy1` blankline)
+
spaces
return $ return result
@@ -1515,17 +1539,24 @@ code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ result <- (trim . concat) <$>
+ many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
- >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawInline syn result
+ Right attr -> B.codeWith attr result
math :: PandocMonad m => MarkdownParser m (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
+ <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
(guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
@@ -1849,9 +1880,8 @@ rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead (char '\\')
notFollowedBy' rawConTeXtEnvironment
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index a3ff60c14..a7f073d50 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -52,13 +52,14 @@ import qualified Data.Set as Set
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim)
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim,
+ crFilter)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
@@ -77,7 +78,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (unpack s ++ "\n")
+ (unpack (crFilter s) ++ "\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -205,7 +206,6 @@ parseMediaWiki = do
block :: PandocMonad m => MWParser m Blocks
block = do
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> table
<|> header
@@ -218,7 +218,7 @@ block = do
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
para :: PandocMonad m => MWParser m Blocks
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
new file mode 100644
index 000000000..1ae73c148
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -0,0 +1,607 @@
+{-
+ Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Muse
+ Copyright : Copyright (C) 2017 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of Muse text to 'Pandoc' document.
+-}
+{-
+TODO:
+- {{{ }}} syntax for <example>
+- Page breaks (five "*")
+- Headings with anchors (make it round trip with Muse writer)
+- <verse> and ">"
+- Definition lists
+- Org tables
+- table.el tables
+- Images with attributes (floating and width)
+- Anchors
+- Citations and <biblio>
+- <play> environment
+- <verbatim> tag
+-}
+module Text.Pandoc.Readers.Muse (readMuse) where
+
+import Control.Monad
+import Control.Monad.Except (throwError)
+import qualified Data.Map as M
+import Data.Text (Text, unpack)
+import Data.List (stripPrefix)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter)
+import Text.Pandoc.Parsing hiding (nested)
+import Text.Pandoc.Readers.HTML (htmlTag)
+import Text.Pandoc.XML (fromEntities)
+import System.FilePath (takeExtension)
+
+-- | Read Muse from an input string and return a Pandoc document.
+readMuse :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readMuse opts s = do
+ res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type MuseParser = ParserT String ParserState
+
+--
+-- main parser
+--
+
+parseMuse :: PandocMonad m => MuseParser m Pandoc
+parseMuse = do
+ many directive
+ blocks <- parseBlocks
+ st <- getState
+ let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+ meta <- stateMeta' st
+ return $ Pandoc meta bs) st
+ reportLogMessages
+ return doc
+
+parseBlocks :: PandocMonad m => MuseParser m (F Blocks)
+parseBlocks = do
+ res <- mconcat <$> many block
+ spaces
+ eof
+ return res
+
+--
+-- utility functions
+--
+
+nested :: PandocMonad m => MuseParser m a -> MuseParser m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlElement tag = try $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = void $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: PandocMonad m
+ => String -> MuseParser m a -> MuseParser m (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a]
+parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p)
+
+--
+-- directive parsers
+--
+
+parseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseDirective = do
+ char '#'
+ key <- many letter
+ space
+ spaces
+ raw <- many $ noneOf "\n"
+ newline
+ value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw
+ return (key, value)
+
+directive :: PandocMonad m => MuseParser m ()
+directive = do
+ (key, value) <- parseDirective
+ updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st }
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => MuseParser m (F Blocks)
+block = do
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
+ return res
+
+blockElements :: PandocMonad m => MuseParser m (F Blocks)
+blockElements = choice [ comment
+ , separator
+ , header
+ , exampleTag
+ , literal
+ , centerTag
+ , rightTag
+ , quoteTag
+ , bulletList
+ , orderedList
+ , table
+ , commentTag
+ , indentedBlock
+ , noteBlock
+ ]
+
+comment :: PandocMonad m => MuseParser m (F Blocks)
+comment = try $ do
+ char ';'
+ space
+ many $ noneOf "\n"
+ void newline <|> eof
+ return mempty
+
+separator :: PandocMonad m => MuseParser m (F Blocks)
+separator = try $ do
+ string "----"
+ many $ char '-'
+ many spaceChar
+ void newline <|> eof
+ return $ return B.horizontalRule
+
+header :: PandocMonad m => MuseParser m (F Blocks)
+header = try $ do
+ st <- stateParserContext <$> getState
+ q <- stateQuoteContext <$> getState
+ getPosition >>= \pos -> guard (st == NullState && q == NoQuote && sourceColumn pos == 1)
+ level <- liftM length $ many1 $ char '*'
+ guard $ level <= 5
+ skipSpaces
+ content <- trimInlinesF . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", [], []) (runF content defaultParserState)
+ return $ B.headerWith attr level <$> content
+
+exampleTag :: PandocMonad m => MuseParser m (F Blocks)
+exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example"
+
+literal :: PandocMonad m => MuseParser m (F Blocks)
+literal = liftM (return . rawBlock) $ htmlElement "literal"
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+blockTag :: PandocMonad m
+ => (Blocks -> Blocks)
+ -> String
+ -> MuseParser m (F Blocks)
+blockTag f s = do
+ res <- parseHtmlContent s block
+ return $ f <$> mconcat res
+
+-- <center> tag is ignored
+centerTag :: PandocMonad m => MuseParser m (F Blocks)
+centerTag = blockTag id "center"
+
+-- <right> tag is ignored
+rightTag :: PandocMonad m => MuseParser m (F Blocks)
+rightTag = blockTag id "right"
+
+quoteTag :: PandocMonad m => MuseParser m (F Blocks)
+quoteTag = blockTag B.blockQuote "quote"
+
+commentTag :: PandocMonad m => MuseParser m (F Blocks)
+commentTag = parseHtmlContent "comment" block >> return mempty
+
+-- Indented block is either center, right or quote
+indentedLine :: PandocMonad m => MuseParser m (Int, String)
+indentedLine = try $ do
+ indent <- length <$> many1 spaceChar
+ line <- anyLine
+ return (indent, line)
+
+rawIndentedBlock :: PandocMonad m => MuseParser m (Int, String)
+rawIndentedBlock = try $ do
+ lns <- many1 indentedLine
+ let indent = minimum $ map fst lns
+ return (indent, unlines $ map snd lns)
+
+indentedBlock :: PandocMonad m => MuseParser m (F Blocks)
+indentedBlock = try $ do
+ (indent, raw) <- rawIndentedBlock
+ contents <- withQuoteContext InDoubleQuote $ parseFromString parseBlocks raw
+ return $ (if indent >= 2 && indent < 6 then B.blockQuote else id) <$> contents
+
+para :: PandocMonad m => MuseParser m (F Blocks)
+para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> void blockElements
+
+noteMarker :: PandocMonad m => MuseParser m String
+noteMarker = try $ do
+ char '['
+ many1Till digit $ char ']'
+
+noteBlock :: PandocMonad m => MuseParser m (F Blocks)
+noteBlock = try $ do
+ pos <- getPosition
+ ref <- noteMarker <* skipSpaces
+ content <- mconcat <$> blocksTillNote
+ oldnotes <- stateNotes' <$> getState
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
+ Nothing -> return ()
+ updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes }
+ return mempty
+ where
+ blocksTillNote =
+ many1Till block (eof <|> () <$ lookAhead noteMarker)
+
+--
+-- lists
+--
+
+listLine :: PandocMonad m => Int -> MuseParser m String
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ anyLineNewline
+
+withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a
+withListContext p = do
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state { stateParserContext = ListItemState }
+ parsed <- p
+ updateState (\st -> st {stateParserContext = oldContext})
+ return parsed
+
+listContinuation :: PandocMonad m => Int -> MuseParser m String
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 $ listLine markerLength
+ return $ blanks ++ concat result
+
+listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int
+listStart marker = try $ do
+ preWhitespace <- length <$> many spaceChar
+ st <- stateParserContext <$> getState
+ getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1)
+ markerLength <- marker
+ postWhitespace <- length <$> many1 spaceChar
+ return $ preWhitespace + markerLength + postWhitespace
+
+listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks)
+listItem start = try $ do
+ markerLength <- start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ restLines <- many $ listLine markerLength
+ let first = firstLine ++ blank ++ concat restLines
+ rest <- many $ listContinuation markerLength
+ parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n"
+
+bulletListItems :: PandocMonad m => MuseParser m (F [Blocks])
+bulletListItems = sequence <$> many1 (listItem bulletListStart)
+
+bulletListStart :: PandocMonad m => MuseParser m Int
+bulletListStart = listStart (char '-' >> return 1)
+
+bulletList :: PandocMonad m => MuseParser m (F Blocks)
+bulletList = do
+ listItems <- bulletListItems
+ return $ B.bulletList <$> listItems
+
+orderedListStart :: PandocMonad m
+ => ListNumberStyle
+ -> ListNumberDelim
+ -> MuseParser m Int
+orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim))
+
+orderedList :: PandocMonad m => MuseParser m (F Blocks)
+orderedList = try $ do
+ p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar)
+ guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
+ guard $ delim == Period
+ items <- sequence <$> many1 (listItem $ orderedListStart style delim)
+ return $ B.orderedListWith p <$> items
+
+--
+-- tables
+--
+
+data MuseTable = MuseTable
+ { museTableCaption :: Inlines
+ , museTableHeaders :: [[Blocks]]
+ , museTableRows :: [[Blocks]]
+ , museTableFooters :: [[Blocks]]
+ }
+
+data MuseTableElement = MuseHeaderRow (F [Blocks])
+ | MuseBodyRow (F [Blocks])
+ | MuseFooterRow (F [Blocks])
+ | MuseCaption (F Inlines)
+
+museToPandocTable :: MuseTable -> Blocks
+museToPandocTable (MuseTable caption headers body footers) =
+ B.table caption attrs headRow rows
+ where ncol = maximum (0 : map length (headers ++ body ++ footers))
+ attrs = replicate ncol (AlignDefault, 0.0)
+ headRow = if null headers then [] else head headers
+ rows = (if null headers then [] else tail headers) ++ body ++ footers
+
+museAppendElement :: MuseTable
+ -> MuseTableElement
+ -> F MuseTable
+museAppendElement tbl element =
+ case element of
+ MuseHeaderRow row -> do
+ row' <- row
+ return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] }
+ MuseBodyRow row -> do
+ row' <- row
+ return tbl{ museTableRows = museTableRows tbl ++ [row'] }
+ MuseFooterRow row-> do
+ row' <- row
+ return tbl{ museTableFooters = museTableFooters tbl ++ [row'] }
+ MuseCaption inlines -> do
+ inlines' <- inlines
+ return tbl{ museTableCaption = inlines' }
+
+tableCell :: PandocMonad m => MuseParser m (F Blocks)
+tableCell = try $ do
+ content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
+ return $ B.plain <$> content
+ where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof
+
+tableElements :: PandocMonad m => MuseParser m [MuseTableElement]
+tableElements = tableParseElement `sepEndBy1` (void newline <|> eof)
+
+elementsToTable :: [MuseTableElement] -> F MuseTable
+elementsToTable = foldM museAppendElement emptyTable
+ where emptyTable = MuseTable mempty mempty mempty mempty
+
+table :: PandocMonad m => MuseParser m (F Blocks)
+table = try $ do
+ rows <- tableElements
+ let tbl = elementsToTable rows
+ let pandocTbl = museToPandocTable <$> tbl :: F Blocks
+ return pandocTbl
+
+tableParseElement :: PandocMonad m => MuseParser m MuseTableElement
+tableParseElement = tableParseHeader
+ <|> tableParseBody
+ <|> tableParseFooter
+ <|> tableParseCaption
+
+tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow n = try $ do
+ fields <- tableCell `sepBy2` fieldSep
+ return $ sequence fields
+ where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
+ fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+
+tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
+tableParseHeader = MuseHeaderRow <$> tableParseRow 2
+
+tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
+tableParseBody = MuseBodyRow <$> tableParseRow 1
+
+tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
+tableParseFooter = MuseFooterRow <$> tableParseRow 3
+
+tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
+tableParseCaption = try $ do
+ many spaceChar
+ string "|+"
+ contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|")
+ string "+|"
+ return $ MuseCaption contents
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => MuseParser m (F Inlines)
+inline = choice [ br
+ , footnote
+ , strong
+ , strongTag
+ , emph
+ , emphTag
+ , superscriptTag
+ , subscriptTag
+ , strikeoutTag
+ , link
+ , code
+ , codeTag
+ , whitespace
+ , str
+ , symbol
+ ] <?> "inline"
+
+footnote :: PandocMonad m => MuseParser m (F Inlines)
+footnote = try $ do
+ ref <- noteMarker
+ return $ do
+ notes <- asksF stateNotes'
+ case M.lookup ref notes of
+ Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Just (_pos, contents) -> do
+ st <- askF
+ let contents' = runF contents st { stateNotes' = M.empty }
+ return $ B.note contents'
+
+whitespace :: PandocMonad m => MuseParser m (F Inlines)
+whitespace = liftM return (lb <|> regsp)
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: PandocMonad m => MuseParser m (F Inlines)
+br = try $ do
+ string "<br>"
+ return $ return B.linebreak
+
+linebreak :: PandocMonad m => MuseParser m (F Inlines)
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = do
+ eof
+ return $ return mempty
+ innerNewline = return $ return B.space
+
+emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
+emphasisBetween c = try $ enclosedInlines c c
+
+enclosedInlines :: (PandocMonad m, Show a, Show b)
+ => MuseParser m a
+ -> MuseParser m b
+ -> MuseParser m (F Inlines)
+enclosedInlines start end = try $
+ trimInlinesF . mconcat <$> enclosed start end inline
+
+verbatimBetween :: PandocMonad m
+ => Char
+ -> MuseParser m String
+verbatimBetween c = try $ do
+ char c
+ many1Till anyChar $ char c
+
+inlineTag :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> String
+ -> MuseParser m (F Inlines)
+inlineTag f s = do
+ res <- parseHtmlContent s inline
+ return $ f <$> mconcat res
+
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = inlineTag B.strong "strong"
+
+strong :: PandocMonad m => MuseParser m (F Inlines)
+strong = fmap B.strong <$> emphasisBetween (string "**")
+
+emph :: PandocMonad m => MuseParser m (F Inlines)
+emph = fmap B.emph <$> emphasisBetween (char '*')
+
+emphTag :: PandocMonad m => MuseParser m (F Inlines)
+emphTag = inlineTag B.emph "em"
+
+superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+superscriptTag = inlineTag B.superscript "sup"
+
+subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+subscriptTag = inlineTag B.subscript "sub"
+
+strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
+strikeoutTag = inlineTag B.strikeout "del"
+
+code :: PandocMonad m => MuseParser m (F Inlines)
+code = try $ do
+ pos <- getPosition
+ sp <- if sourceColumn pos == 1
+ then pure mempty
+ else skipMany1 spaceChar >> pure B.space
+ cd <- verbatimBetween '='
+ notFollowedBy nonspaceChar
+ return $ return (sp B.<> B.code cd)
+
+codeTag :: PandocMonad m => MuseParser m (F Inlines)
+codeTag = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ return $ B.codeWith attrs $ fromEntities content
+
+str :: PandocMonad m => MuseParser m (F Inlines)
+str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => MuseParser m (F Inlines)
+symbol = liftM (return . B.str) $ count 1 nonspaceChar
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ case stripPrefix "URL:" url of
+ Nothing -> if isImageUrl url
+ then B.image url title <$> fromMaybe (return mempty) content
+ else B.link url title <$> fromMaybe (return $ B.str url) content
+ Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content
+ where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ isImageUrl = (`elem` imageExtensions) . takeExtension
+
+linkContent :: PandocMonad m => MuseParser m (F Inlines)
+linkContent = do
+ char '['
+ res <- many1Till anyChar $ char ']'
+ parseFromString (mconcat <$> many1 inline) res
+
+linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines))
+linkText = do
+ string "[["
+ url <- many1Till anyChar $ char ']'
+ content <- optionMaybe linkContent
+ char ']'
+ return (url, "", content)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 591d7590e..c25ace800 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (toUpper)
import Data.Text (Text, unpack, pack)
import Data.Default
@@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
@@ -32,7 +33,8 @@ instance Default OPMLState where
readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML _ inp = do
(bs, st') <- flip runStateT def
- (mapM parseBlock $ normalizeTree $ parseXML (unpack inp))
+ (mapM parseBlock $ normalizeTree $
+ parseXML (unpack (crFilter inp)))
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
index 4d6a67b8e..8c47cdaf5 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -124,8 +124,3 @@ instance ChoiceVector SuccessList where
spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
where unTagRight (Right x) = (x:)
unTagRight _ = id
-
--- | Like 'catMaybes', but for 'Either'.
-collectRights :: [Either _l r] -> [r]
-collectRights = collectNonFailing . untag . spreadChoice . SuccessList
- where untag = fromLeft (error "Unexpected Left")
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 1c3e08a7f..428048427 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -71,6 +71,7 @@ import Control.Applicative hiding ( liftA, liftA2 )
import Control.Monad ( MonadPlus )
import Control.Arrow
+import Data.Either ( rights )
import qualified Data.Map as M
import Data.Default
import Data.Maybe
@@ -604,7 +605,7 @@ tryAll :: (NameSpaceID nsID)
-> XMLConverter nsID extraState b [a]
tryAll nsID name a = prepareIteration nsID name
>>> iterateS (switchingTheStack a)
- >>^ collectRights
+ >>^ rights
--------------------------------------------------------------------------------
-- Matching children
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5e0d67d10..eaccc251c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -36,6 +36,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
+import Text.Pandoc.Shared (crFilter)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
@@ -51,7 +52,7 @@ readOrg :: PandocMonad m
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 66273e05d..42fdfd4dd 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just
inlineLaTeXCommand :: PandocMonad m => OrgParser m String
inlineLaTeXCommand = try $ do
rest <- getInput
- parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
+ st <- getState
+ parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest
case parsed of
- Right (RawInline _ cs) -> do
+ Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
let cmdNoSpc = dropWhileEnd isSpace cs
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 92f868516..fc98213fb 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, OrgNoteRecord
, HasReaderOptions (..)
, HasQuoteContext (..)
+ , HasMacros (..)
, TodoMarker (..)
, TodoSequence
, TodoState (..)
@@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local)
import Data.Default (Default (..))
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Definition (Meta (..), nullMeta)
import Text.Pandoc.Logging
import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..),
HasIncludeFiles (..), HasLastStrPosition (..),
HasLogMessages (..), HasQuoteContext (..),
+ HasMacros (..),
HasReaderOptions (..), ParserContext (..),
QuoteContext (..), SourcePos, askF, asksF, returnF,
runF, trimInlinesF)
@@ -118,6 +122,7 @@ data OrgParserState = OrgParserState
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
, orgLogMessages :: [LogMessage]
+ , orgMacros :: M.Map Text Macro
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where
addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
getLogMessages st = reverse $ orgLogMessages st
+instance HasMacros OrgParserState where
+ extractMacros st = orgMacros st
+ updateMacros f st = st{ orgMacros = f (orgMacros st) }
+
instance HasIncludeFiles OrgParserState where
getIncludeFiles = orgStateIncludeFiles
addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st }
@@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState
, orgStateParserContext = NullState
, orgStateTodoSequences = []
, orgLogMessages = []
+ , orgMacros = M.empty
}
optionsToParserState :: ReaderOptions -> OrgParserState
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index fb5f6f2d4..2daf60a89 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST ( readRST ) where
-import Control.Monad (guard, liftM, mzero, when)
+import Control.Monad (guard, liftM, mzero, when, forM_)
import Control.Monad.Identity (Identity(..))
import Control.Monad.Except (throwError)
import Data.Char (isHexDigit, isSpace, toLower, toUpper)
@@ -68,7 +68,7 @@ readRST :: PandocMonad m
-> m Pandoc
readRST opts s = do
parsed <- (readWithM parseRST) def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -170,7 +170,8 @@ parseRST = do
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- concat <$>
- manyTill (referenceKey <|> noteBlock <|> citationBlock <|>
+ manyTill (referenceKey <|> anchorDef <|>
+ noteBlock <|> citationBlock <|>
headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
@@ -217,6 +218,7 @@ block = choice [ codeBlock
, fieldList
, include
, directive
+ , anchor
, comment
, header
, hrule
@@ -1054,16 +1056,49 @@ stripTicks = reverse . stripTick . reverse . stripTick
where stripTick ('`':xs) = xs
stripTick xs = xs
+referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames = do
+ let rn = try $ do
+ string ".. _"
+ (_, ref) <- withRaw referenceName
+ char ':'
+ return ref
+ first <- rn
+ rest <- many (try (blanklines *> rn))
+ return (first:rest)
+
regularKey :: PandocMonad m => RSTParser m ()
regularKey = try $ do
- string ".. _"
- (_,ref) <- withRaw referenceName
- char ':'
+ -- we allow several references to the same URL, e.g.
+ -- .. _hello:
+ -- .. _goodbye: url.com
+ refs <- referenceNames
src <- targetURI
- let key = toKey $ stripTicks ref
+ guard $ not (null src)
--TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
- stateKeys s }
+ let keys = map (toKey . stripTicks) refs
+ forM_ keys $ \key ->
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef = try $ do
+ (refs, raw) <- withRaw (try (referenceNames <* blanklines))
+ let keys = map stripTicks refs
+ forM_ keys $ \rawkey ->
+ updateState $ \s -> s { stateKeys =
+ M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ -- keep this for 2nd round of parsing, where we'll add the divs (anchor)
+ return raw
+
+anchor :: PandocMonad m => RSTParser m Blocks
+anchor = try $ do
+ refs <- referenceNames
+ blanklines
+ b <- block
+ -- put identifier on next block:
+ let addDiv ref = B.divWith (ref, [], [])
+ return $ foldr addDiv b refs
headerBlock :: PandocMonad m => RSTParser m [Char]
headerBlock = do
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 9e544c4ac..d41152de5 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -42,13 +42,13 @@ import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Text.HTML.TagSoup
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
-import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.Shared (crFilter)
import Data.Text (Text)
import qualified Data.Text as T
@@ -59,7 +59,7 @@ readTWiki :: PandocMonad m
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case res of
Left e -> throwError e
Right d -> return d
@@ -133,12 +133,11 @@ parseTWiki = do
block :: PandocMonad m => TWParser m B.Blocks
block = do
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> blockElements
<|> para
skipMany blankline
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
blockElements :: PandocMonad m => TWParser m B.Blocks
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 1669e3e51..853d2768f 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -61,15 +61,14 @@ import Text.HTML.TagSoup (Tag (..), fromAttrib)
import Text.HTML.TagSoup.Match
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.CSS
import Text.Pandoc.Definition
-import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (trim)
+import Text.Pandoc.Shared (trim, crFilter)
import Data.Text (Text)
import qualified Data.Text as T
@@ -80,7 +79,7 @@ readTextile :: PandocMonad m
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -143,8 +142,7 @@ blockParsers = [ codeBlock
block :: PandocMonad m => ParserT [Char] ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
- pos <- getPosition
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
@@ -575,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- B.singleton <$> rawLaTeXInline
+ B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 260bb7fff..f000646c2 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -40,8 +40,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (macro, space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
+import Text.Pandoc.Parsing hiding (space, spaces, uri)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter)
import Control.Monad (guard, void, when)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Default
@@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
- let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n")
+ let parsed = flip runReader meta $
+ readWithM parseT2T (def {stateOptions = opts}) $
+ T.unpack (crFilter s) ++ "\n\n"
case parsed of
Right result -> return $ result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
new file mode 100644
index 000000000..52bf37d35
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -0,0 +1,673 @@
+{-
+ Copyright (C) 2017 Yuchen Pei <me@ypei.me>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Vimwiki
+ Copyright : Copyright (C) 2017 Yuchen Pei
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Yuchen Pei <me@ypei.me>
+ Stability : alpha
+ Portability : portable
+
+Conversion of vimwiki text to 'Pandoc' document.
+-}
+{--
+[X]: implemented
+[O]: not implemented
+* block parsers:
+ * [X] header
+ * [X] hrule
+ * [X] comment
+ * [X] blockquote
+ * [X] preformatted -- using codeblock
+ * [X] displaymath
+ * [X] bulletlist / orderedlist
+ * [X] todo lists -- using span.
+ * [X] table
+ * [X] centered table -- using div
+ * [O] colspan and rowspan -- see issue #1024
+ * [X] paragraph
+ * [X] definition list
+* inline parsers:
+ * [X] bareURL
+ * [X] strong
+ * [X] emph
+ * [X] strikeout
+ * [X] code
+ * [X] link
+ * [X] image
+ * [X] inline math
+ * [X] tag
+ * [X] sub- and super-scripts
+* misc:
+ * [X] `TODO:` mark
+ * [X] metadata placeholders: %title and %date
+ * [O] control placeholders: %template and %nohtml -- ignored
+--}
+
+module Text.Pandoc.Readers.Vimwiki ( readVimwiki
+ ) where
+import Control.Monad.Except (throwError)
+import Control.Monad (guard)
+import Data.Default
+import Data.Maybe
+import Data.Monoid ((<>))
+import Data.List (isInfixOf, isPrefixOf)
+import Data.Text (Text, unpack)
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList)
+import qualified Text.Pandoc.Builder
+ as B (headerWith, str, space, strong, emph, strikeout, code, link, image,
+ spanWith, para, horizontalRule, blockQuote, bulletList, plain,
+ orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith,
+ setMeta, definitionList, superscript, subscript, displayMath,
+ math)
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Definition (Pandoc(..), Inline(Space),
+ Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..),
+ ListNumberDelim(..))
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState,
+ stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF,
+ orderedListMarker, many1Till)
+import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter)
+import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf,
+ alphaNum)
+import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1,
+ notFollowedBy, option)
+import Text.Parsec.Prim (many, try, updateState, getState)
+import Text.Parsec.Char (oneOf, space)
+import Text.Parsec.Combinator (lookAhead, between)
+import Text.Parsec.Prim ((<|>))
+
+readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki opts s = do
+ res <- readWithM parseVimwiki def{ stateOptions = opts }
+ (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right result -> return result
+
+type VwParser = ParserT [Char] ParserState
+
+
+-- constants
+
+specialChars :: [Char]
+specialChars = "=*-#[]_~{}`$|:%^,"
+
+spaceChars :: [Char]
+spaceChars = " \t\n"
+
+-- main parser
+
+parseVimwiki :: PandocMonad m => VwParser m Pandoc
+parseVimwiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ st <- getState
+ let meta = runF (stateMeta' st) st
+ return $ Pandoc meta (toList bs)
+
+-- block parser
+
+block :: PandocMonad m => VwParser m Blocks
+block = do
+ res <- choice [ mempty <$ blanklines
+ , header
+ , hrule
+ , mempty <$ comment
+ , mixedList
+ , preformatted
+ , displayMath
+ , table
+ , mempty <$ placeholder
+ , blockQuote
+ , definitionList
+ , para
+ ]
+ trace (take 60 $ show $ toList res)
+ return res
+
+blockML :: PandocMonad m => VwParser m Blocks
+blockML = choice [preformatted, displayMath, table]
+
+header :: PandocMonad m => VwParser m Blocks
+header = try $ do
+ sp <- many spaceChar
+ eqs <- many1 (char '=')
+ spaceChar
+ let lev = length eqs
+ guard $ lev <= 6
+ contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar
+ >> (string eqs) >> many spaceChar >> newline)
+ attr <- registerHeader (makeId contents,
+ (if sp == "" then [] else ["justcenter"]), []) contents
+ return $ B.headerWith attr lev contents
+
+para :: PandocMonad m => VwParser m Blocks
+para = try $ do
+ contents <- trimInlines . mconcat <$> many1 inline
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.para contents
+
+hrule :: PandocMonad m => VwParser m Blocks
+hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline)
+
+comment :: PandocMonad m => VwParser m ()
+comment = try $ do
+ many spaceChar >> string "%%" >> many (noneOf "\n")
+ return ()
+
+blockQuote :: PandocMonad m => VwParser m Blocks
+blockQuote = try $ do
+ string " "
+ contents <- trimInlines . mconcat <$> many1 inlineBQ
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.blockQuote $ B.plain contents
+
+definitionList :: PandocMonad m => VwParser m Blocks
+definitionList = try $
+ B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT))
+
+dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithDT = do
+ dt <- definitionTerm
+ dds <- many definitionDef
+ return $ (dt, dds)
+
+dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithoutDT = do
+ dds <- many1 definitionDef
+ return $ (mempty, dds)
+
+definitionDef :: PandocMonad m => VwParser m Blocks
+definitionDef = try $
+ (notFollowedBy definitionTerm) >> many spaceChar
+ >> (definitionDef1 <|> definitionDef2)
+
+definitionDef1 :: PandocMonad m => VwParser m Blocks
+definitionDef1 = try $ mempty <$ defMarkerE
+
+definitionDef2 :: PandocMonad m => VwParser m Blocks
+definitionDef2 = try $ B.plain <$>
+ (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline)
+
+
+definitionTerm :: PandocMonad m => VwParser m Inlines
+definitionTerm = try $ do
+ x <- definitionTerm1 <|> definitionTerm2
+ guard $ (stringify x /= "")
+ return x
+
+definitionTerm1 :: PandocMonad m => VwParser m Inlines
+definitionTerm1 = try $
+ trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE)
+
+definitionTerm2 :: PandocMonad m => VwParser m Inlines
+definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline'
+ (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM))
+
+defMarkerM :: PandocMonad m => VwParser m Char
+defMarkerM = string "::" >> spaceChar
+
+defMarkerE :: PandocMonad m => VwParser m Char
+defMarkerE = string "::" >> newline
+
+hasDefMarkerM :: PandocMonad m => VwParser m String
+hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
+
+preformatted :: PandocMonad m => VwParser m Blocks
+preformatted = try $ do
+ many spaceChar >> string "{{{"
+ attrText <- many (noneOf "\n")
+ lookAhead newline
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
+ >> many spaceChar >> newline))
+ if (not $ contents == "") && (head contents == '\n')
+ then return $ B.codeBlockWith (makeAttr attrText) (tail contents)
+ else return $ B.codeBlockWith (makeAttr attrText) contents
+
+makeAttr :: String -> Attr
+makeAttr s =
+ let xs = splitBy (`elem` " \t") s in
+ ("", [], catMaybes $ map nameValue xs)
+
+nameValue :: String -> Maybe (String, String)
+nameValue s =
+ let t = splitBy (== '=') s in
+ if length t /= 2
+ then Nothing
+ else let (a, b) = (head t, last t) in
+ if ((length b) < 2) || ((head b, last b) /= ('"', '"'))
+ then Nothing
+ else Just (a, stripFirstAndLast b)
+
+
+displayMath :: PandocMonad m => VwParser m Blocks
+displayMath = try $ do
+ many spaceChar >> string "{{$"
+ mathTag <- option "" mathTagParser
+ many space
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$"
+ >> many spaceChar >> newline))
+ let contentsWithTags
+ | mathTag == "" = contents
+ | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents
+ ++ "\n\\end{" ++ mathTag ++ "}"
+ return $ B.para $ B.displayMath contentsWithTags
+
+
+mathTagLaTeX :: String -> String
+mathTagLaTeX s = case s of
+ "equation" -> ""
+ "equation*" -> ""
+ "gather" -> "gathered"
+ "gather*" -> "gathered"
+ "multline" -> "gathered"
+ "multline*" -> "gathered"
+ "eqnarray" -> "aligned"
+ "eqnarray*" -> "aligned"
+ "align" -> "aligned"
+ "align*" -> "aligned"
+ "alignat" -> "aligned"
+ "alignat*" -> "aligned"
+ _ -> s
+
+
+mixedList :: PandocMonad m => VwParser m Blocks
+mixedList = try $ do
+ (bl, _) <- mixedList' (-1)
+ return $ head bl
+
+mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int)
+mixedList' prevInd = do
+ (curInd, builder) <- option (-1, "na") (lookAhead listStart)
+ if curInd < prevInd
+ then return ([], curInd)
+ else do
+ listStart
+ curLine <- listItemContent
+ let listBuilder =
+ if builder == "ul" then B.bulletList else B.orderedList
+ (subList, lowInd) <- (mixedList' curInd)
+ if lowInd >= curInd
+ then do
+ (sameIndList, endInd) <- (mixedList' lowInd)
+ let curList = (combineList curLine subList) ++ sameIndList
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+ else do
+ let (curList, endInd) = ((combineList curLine subList),
+ lowInd)
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+
+plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks
+plainInlineML' w = do
+ xs <- many inlineML
+ newline
+ return $ B.plain $ trimInlines $ mconcat $ w:xs
+
+plainInlineML :: PandocMonad m => VwParser m Blocks
+plainInlineML = (notFollowedBy listStart) >> spaceChar >> plainInlineML' mempty
+
+
+listItemContent :: PandocMonad m => VwParser m Blocks
+listItemContent = try $ do
+ w <- option mempty listTodoMarker
+ x <- plainInlineML' w
+ y <- many blocksThenInline
+ z <- many blockML
+ return $ mconcat $ x:y ++ z
+
+blocksThenInline :: PandocMonad m => VwParser m Blocks
+blocksThenInline = try $ do
+ y <- many1 blockML
+ x <- plainInlineML
+ return $ mconcat $ y ++ [x]
+
+listTodoMarker :: PandocMonad m => VwParser m Inlines
+listTodoMarker = try $ do
+ x <- between (many spaceChar >> char '[') (char ']' >> spaceChar)
+ (oneOf " .oOX")
+ return $ makeListMarkerSpan x
+
+makeListMarkerSpan :: Char -> Inlines
+makeListMarkerSpan x =
+ let cl = case x of
+ ' ' -> "done0"
+ '.' -> "done1"
+ 'o' -> "done2"
+ 'O' -> "done3"
+ 'X' -> "done4"
+ _ -> ""
+ in
+ B.spanWith ("", [cl], []) mempty
+
+combineList :: Blocks -> [Blocks] -> [Blocks]
+combineList x [y] = case toList y of
+ [BulletList z] -> [fromList $ (toList x)
+ ++ [BulletList z]]
+ [OrderedList attr z] -> [fromList $ (toList x)
+ ++ [OrderedList attr z]]
+ _ -> x:[y]
+combineList x xs = x:xs
+
+listStart :: PandocMonad m => VwParser m (Int, String)
+listStart = try $ do
+ s <- many spaceChar
+ listType <- bulletListMarkers <|> orderedListMarkers
+ spaceChar
+ return (length s, listType)
+
+bulletListMarkers :: PandocMonad m => VwParser m String
+bulletListMarkers = "ul" <$ (char '*' <|> char '-')
+
+orderedListMarkers :: PandocMonad m => VwParser m String
+orderedListMarkers =
+ ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen)
+ <$> orderedListMarker
+ <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
+ <|> ("ol" <$ char '#')
+
+--many need trimInlines
+table :: PandocMonad m => VwParser m Blocks
+table = try $ do
+ indent <- lookAhead (many spaceChar)
+ (th, trs) <- table1 <|> table2
+ let tab = B.simpleTable th trs
+ if indent == ""
+ then return tab
+ else return $ B.divWith ("", ["center"], []) tab
+
+-- table with header
+table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table1 = try $ do
+ th <- tableRow
+ many1 tableHeaderSeparator
+ trs <- many tableRow
+ return (th, trs)
+
+-- headerless table
+table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table2 = try $ do
+ trs <- many1 tableRow
+ return (take (length $ head trs) $ repeat mempty, trs)
+
+tableHeaderSeparator :: PandocMonad m => VwParser m ()
+tableHeaderSeparator = try $ do
+ many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|')
+ >> many spaceChar >> newline
+ return ()
+
+tableRow :: PandocMonad m => VwParser m [Blocks]
+tableRow = try $ do
+ many spaceChar >> char '|'
+ s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
+ >> newline))
+ guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
+ tr <- many tableCell
+ many spaceChar >> char '\n'
+ return tr
+
+tableCell :: PandocMonad m => VwParser m Blocks
+tableCell = try $
+ B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|'))
+
+placeholder :: PandocMonad m => VwParser m ()
+placeholder = try $
+ (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh
+
+ph :: PandocMonad m => String -> VwParser m ()
+ph s = try $ do
+ many spaceChar >> (string $ '%':s) >> spaceChar
+ contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline)))
+ --use lookAhead because of placeholder in the whitespace parser
+ let meta' = return $ B.setMeta s contents nullMeta :: F Meta
+ updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' }
+
+noHtmlPh :: PandocMonad m => VwParser m ()
+noHtmlPh = try $
+ () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
+ >> (lookAhead newline))
+
+templatePh :: PandocMonad m => VwParser m ()
+templatePh = try $
+ () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n")
+ >> (lookAhead newline))
+
+-- inline parser
+
+inline :: PandocMonad m => VwParser m Inlines
+inline = choice $ (whitespace endlineP):inlineList
+
+inlineList :: PandocMonad m => [VwParser m Inlines]
+inlineList = [ bareURL
+ , todoMark
+ , str
+ , strong
+ , emph
+ , strikeout
+ , code
+ , link
+ , image
+ , inlineMath
+ , tag
+ , superscript
+ , subscript
+ , special
+ ]
+
+-- inline parser without softbreaks or comment breaks
+inline' :: PandocMonad m => VwParser m Inlines
+inline' = choice $ whitespace':inlineList
+
+-- inline parser for blockquotes
+inlineBQ :: PandocMonad m => VwParser m Inlines
+inlineBQ = choice $ (whitespace endlineBQ):inlineList
+
+-- inline parser for mixedlists
+inlineML :: PandocMonad m => VwParser m Inlines
+inlineML = choice $ (whitespace endlineML):inlineList
+
+str :: PandocMonad m => VwParser m Inlines
+str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars)
+
+whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
+whitespace endline = B.space <$ (skipMany1 spaceChar <|>
+ (try (newline >> (comment <|> placeholder))))
+ <|> B.softbreak <$ endline
+
+whitespace' :: PandocMonad m => VwParser m Inlines
+whitespace' = B.space <$ skipMany1 spaceChar
+
+special :: PandocMonad m => VwParser m Inlines
+special = B.str <$> count 1 (oneOf specialChars)
+
+bareURL :: PandocMonad m => VwParser m Inlines
+bareURL = try $ do
+ (orig, src) <- uri <|> emailAddress
+ return $ B.link src "" (B.str orig)
+
+strong :: PandocMonad m => VwParser m Inlines
+strong = try $ do
+ s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*")
+ guard $ (not $ (head s) `elem` spaceChars)
+ && (not $ (last s) `elem` spaceChars)
+ char '*'
+ contents <- mconcat <$> (manyTill inline' $ char '*'
+ >> notFollowedBy alphaNum)
+ return $ (B.spanWith ((makeId contents), [], []) mempty)
+ <> (B.strong contents)
+
+makeId :: Inlines -> String
+makeId i = concat (stringify <$> (toList i))
+
+emph :: PandocMonad m => VwParser m Inlines
+emph = try $ do
+ s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_")
+ guard $ (not $ (head s) `elem` spaceChars)
+ && (not $ (last s) `elem` spaceChars)
+ char '_'
+ contents <- mconcat <$> (manyTill inline' $ char '_'
+ >> notFollowedBy alphaNum)
+ return $ B.emph contents
+
+strikeout :: PandocMonad m => VwParser m Inlines
+strikeout = try $ do
+ string "~~"
+ contents <- mconcat <$> (many1Till inline' $ string $ "~~")
+ return $ B.strikeout contents
+
+code :: PandocMonad m => VwParser m Inlines
+code = try $ do
+ char '`'
+ contents <- many1Till (noneOf "\n") (char '`')
+ return $ B.code contents
+
+superscript :: PandocMonad m => VwParser m Inlines
+superscript = try $
+ B.superscript <$> mconcat <$> (char '^' >> many1Till inline' (char '^'))
+
+subscript :: PandocMonad m => VwParser m Inlines
+subscript = try $
+ B.subscript <$> mconcat <$> (string ",,"
+ >> many1Till inline' (try $ string ",,"))
+
+link :: PandocMonad m => VwParser m Inlines
+link = try $ do
+ string "[["
+ contents <- lookAhead $ manyTill anyChar (string "]]")
+ case '|' `elem` contents of
+ False -> do
+ manyTill anyChar (string "]]")
+-- not using try here because [[hell]o]] is not rendered as a link in vimwiki
+ return $ B.link (procLink contents) "" (B.str contents)
+ True -> do
+ url <- manyTill anyChar $ char '|'
+ lab <- mconcat <$> (manyTill inline $ string "]]")
+ return $ B.link (procLink url) "" lab
+
+image :: PandocMonad m => VwParser m Inlines
+image = try $ do
+ string "{{"
+ contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}")
+ images $ length $ filter (== '|') contentText
+
+images :: PandocMonad m => Int -> VwParser m Inlines
+images k
+ | k == 0 = do
+ imgurl <- manyTill anyChar (try $ string "}}")
+ return $ B.image (procImgurl imgurl) "" (B.str "")
+ | k == 1 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ (try $ string "}}"))
+ return $ B.image (procImgurl imgurl) "" alt
+ | k == 2 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ char '|')
+ attrText <- manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+ | otherwise = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ char '|')
+ attrText <- manyTill anyChar (char '|')
+ manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+
+procLink' :: String -> String
+procLink' s
+ | ((take 6 s) == "local:") = "file" ++ (drop 5 s)
+ | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html"
+ | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+ "news:", "telnet:" ])
+ = s
+ | s == "" = ""
+ | (last s) == '/' = s
+ | otherwise = s ++ ".html"
+
+procLink :: String -> String
+procLink s = procLink' x ++ y
+ where (x, y) = break (=='#') s
+
+procImgurl :: String -> String
+procImgurl s = if ((take 6 s) == "local:") then "file" ++ (drop 5 s) else s
+
+inlineMath :: PandocMonad m => VwParser m Inlines
+inlineMath = try $ do
+ char '$'
+ contents <- many1Till (noneOf "\n") (char '$')
+ return $ B.math contents
+
+tag :: PandocMonad m => VwParser m Inlines
+tag = try $ do
+ char ':'
+ s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space)))
+ guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
+ let ss = splitBy (==':') s
+ return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss))
+
+todoMark :: PandocMonad m => VwParser m Inlines
+todoMark = try $ do
+ string "TODO:"
+ return $ B.spanWith ("", ["todo"], []) (B.str "TODO:")
+
+-- helper functions and parsers
+endlineP :: PandocMonad m => VwParser m ()
+endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote)
+
+endlineBQ :: PandocMonad m => VwParser m ()
+endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ")
+
+endlineML :: PandocMonad m => VwParser m ()
+endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar)
+
+--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks
+nFBTTBSB :: PandocMonad m => VwParser m ()
+nFBTTBSB =
+ notFollowedBy newline <*
+ notFollowedBy hrule <*
+ notFollowedBy tableRow <*
+ notFollowedBy header <*
+ notFollowedBy listStart <*
+ notFollowedBy preformatted <*
+ notFollowedBy displayMath <*
+ notFollowedBy hasDefMarker
+
+hasDefMarker :: PandocMonad m => VwParser m ()
+hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars))
+
+makeTagSpan' :: String -> Inlines
+makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
+ B.spanWith (s, ["tag"], []) (B.str s)
+
+makeTagSpan :: String -> Inlines
+makeTagSpan s = (B.space) <> (makeTagSpan' s)
+
+mathTagParser :: PandocMonad m => VwParser m String
+mathTagParser = do
+ s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars)
+ (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)))
+ char '%' >> string s >> char '%'
+ return $ mathTagLaTeX s
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 745e809d0..53fd38ffd 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -49,6 +49,7 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
tabFilter,
+ crFilter,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
@@ -117,7 +118,7 @@ import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Error (PandocError(..))
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
-import qualified Control.Monad.State as S
+import qualified Control.Monad.State.Strict as S
import qualified Control.Exception as E
import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
@@ -279,13 +280,12 @@ escapeURI = escapeURIString (not . needsEscaping)
where needsEscaping c = isSpace c || c `elem`
['<','>','|','"','{','}','[',']','^', '`']
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
+-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
tabFilter :: Int -- ^ Tab stop
-> T.Text -- ^ Input
-> T.Text
-tabFilter tabStop = T.filter (/= '\r') . T.unlines .
- (if tabStop == 0 then id else map go) . T.lines
+tabFilter 0 = id
+tabFilter tabStop = T.unlines . map go . T.lines
where go s =
let (s1, s2) = T.break (== '\t') s
in if T.null s2
@@ -294,6 +294,10 @@ tabFilter tabStop = T.filter (/= '\r') . T.unlines .
(tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
<> go (T.drop 1 s2)
+-- | Strip out DOS line endings.
+crFilter :: T.Text -> T.Text
+crFilter = T.filter (/= '\r')
+
--
-- Date/time
--
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 9b635a97b..1a26b7168 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -33,20 +33,20 @@ A simple templating system with variable substitution and conditionals.
-}
-module Text.Pandoc.Templates ( renderTemplate
+module Text.Pandoc.Templates ( module Text.DocTemplates
, renderTemplate'
- , TemplateTarget
- , varListToJSON
- , compileTemplate
- , Template
- , getDefaultTemplate ) where
+ , getDefaultTemplate
+ ) where
import qualified Control.Exception as E (IOException, try)
+import Control.Monad.Except (throwError)
import Data.Aeson (ToJSON (..))
import qualified Data.Text as T
import System.FilePath ((<.>), (</>))
import Text.DocTemplates (Template, TemplateTarget, applyTemplate,
compileTemplate, renderTemplate, varListToJSON)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Error
import Text.Pandoc.Shared (readDataFileUTF8)
-- | Get default template for the specified writer.
@@ -72,7 +72,11 @@ getDefaultTemplate user writer = do
_ -> let fname = "templates" </> "default" <.> format
in E.try $ readDataFileUTF8 user fname
--- | Like 'applyTemplate', but raising an error if compilation fails.
-renderTemplate' :: (ToJSON a, TemplateTarget b) => String -> a -> b
-renderTemplate' template = either error id . applyTemplate (T.pack template)
-
+-- | Like 'applyTemplate', but runs in PandocMonad and
+-- raises an error if compilation fails.
+renderTemplate' :: (PandocMonad m, ToJSON a, TemplateTarget b)
+ => String -> a -> m b
+renderTemplate' template context = do
+ case applyTemplate (T.pack template) context of
+ Left e -> throwError (PandocTemplateError e)
+ Right r -> return r
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index dbe55449f..6dfc1a7b3 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -176,19 +176,16 @@ writers = [
,("muse" , TextWriter writeMuse)
]
-getWriter :: PandocMonad m => String -> Either String (Writer m)
+-- | Retrieve writer, extensions based on formatSpec (format+extensions).
+getWriter :: PandocMonad m => String -> Either String (Writer m, Extensions)
getWriter s
= case parseFormatSpec s of
Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e]
Right (writerName, setExts) ->
case lookup writerName writers of
Nothing -> Left $ "Unknown writer: " ++ writerName
- Just (TextWriter r) -> Right $ TextWriter $
- \o -> r o{ writerExtensions = setExts $
- getDefaultExtensions writerName }
- Just (ByteStringWriter r) -> Right $ ByteStringWriter $
- \o -> r o{ writerExtensions = setExts $
- getDefaultExtensions writerName }
+ Just r -> Right (r, setExts $
+ getDefaultExtensions writerName)
writeJSON :: WriterOptions -> Pandoc -> Text
writeJSON _ = UTF8.toText . BL.toStrict . encode
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 46dbe6eaf..112f8b657 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -37,7 +37,7 @@ that it has omitted the construct.
AsciiDoc: <http://www.methods.co.nz/asciidoc/>
-}
module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Aeson (Result (..), Value (String), fromJSON, toJSON)
import Data.Char (isPunctuation, isSpace)
import Data.List (intercalate, intersperse, stripPrefix)
@@ -105,7 +105,7 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
$ metadata'
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape special characters for AsciiDoc.
escapeString :: String -> String
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index ed316ced9..63249a7ce 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -32,7 +32,7 @@ CommonMark: <http://commonmark.org>
module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
import CMark
-import Control.Monad.State (State, get, modify, runState)
+import Control.Monad.State.Strict (State, get, modify, runState)
import Data.Foldable (foldrM)
import Data.Text (Text)
import qualified Data.Text as T
@@ -58,9 +58,9 @@ writeCommonMark opts (Pandoc meta blocks) = do
(inlinesToCommonMark opts)
meta
let context = defField "body" main $ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
processNotes :: Inline -> State [[Block]] Inline
processNotes (Note bs) = do
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 2da6a7f9a..3c901cab6 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2007-2017 John MacFarlane <jgm@berkeley.edu>
@@ -29,12 +30,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' format into ConTeXt.
-}
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (ord)
import Data.List (intercalate, intersperse)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Network.URI (unEscapeString)
+import Text.Pandoc.BCP47
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging
import Text.Pandoc.Definition
@@ -88,6 +90,7 @@ pandocToConTeXt options (Pandoc meta blocks) = do
,("top","margin-top")
,("bottom","margin-bottom")
]
+ mblang <- fromBCP47 (getLang options meta)
let context = defField "toc" (writerTableOfContents options)
$ defField "placelist" (intercalate ("," :: String) $
take (writerTOCDepth options +
@@ -100,14 +103,17 @@ pandocToConTeXt options (Pandoc meta blocks) = do
$ defField "body" main
$ defField "layout" layoutFromMargins
$ defField "number-sections" (writerNumberSections options)
+ $ maybe id (defField "context-lang") mblang
+ $ (case getField "papersize" metadata of
+ Just ("a4" :: String) -> resetField "papersize"
+ ("A4" :: String)
+ _ -> id)
$ metadata
- let context' = defField "context-lang" (maybe "" (fromBcp47 . splitBy (=='-')) $
- getField "lang" context)
- $ defField "context-dir" (toContextDir $ getField "dir" context)
- $ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
+ let context' = defField "context-dir" (toContextDir
+ $ getField "dir" context) context
+ case writerTemplate options of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context'
toContextDir :: Maybe String -> String
toContextDir (Just "rtl") = "r2l"
@@ -186,6 +192,7 @@ blockToConTeXt b@(RawBlock _ _ ) = do
return empty
blockToConTeXt (Div (ident,_,kvs) bs) = do
let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
+ mblang <- fromBCP47 (lookup "lang" kvs)
let wrapRef txt = if null ident
then txt
else ("\\reference" <> brackets (text $ toLabel ident) <>
@@ -194,9 +201,9 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do
Just "rtl" -> align "righttoleft"
Just "ltr" -> align "lefttoright"
_ -> id
- wrapLang txt = case lookup "lang" kvs of
+ wrapLang txt = case mblang of
Just lng -> "\\start\\language["
- <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
+ <> text lng <> "]" $$ txt $$ "\\stop"
Nothing -> txt
wrapBlank txt = blankline <> txt <> blankline
fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
@@ -416,12 +423,13 @@ inlineToConTeXt (Note contents) = do
else text "\\startbuffer " <> nest 2 contents' <>
text "\\stopbuffer\\footnote{\\getbuffer}"
inlineToConTeXt (Span (_,_,kvs) ils) = do
+ mblang <- fromBCP47 (lookup "lang" kvs)
let wrapDir txt = case lookup "dir" kvs of
Just "rtl" -> braces $ "\\righttoleft " <> txt
Just "ltr" -> braces $ "\\lefttoright " <> txt
_ -> txt
- wrapLang txt = case lookup "lang" kvs of
- Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
+ wrapLang txt = case mblang of
+ Just lng -> "\\start\\language[" <> text lng
<> "]" <> txt <> "\\stop "
Nothing -> txt
fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
@@ -458,36 +466,34 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
<> blankline
_ -> contents <> blankline
-fromBcp47' :: String -> String
-fromBcp47' = fromBcp47 . splitBy (=='-')
+fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String)
+fromBCP47 mbs = fromBCP47' <$> toLang mbs
-- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1
-- http://wiki.contextgarden.net/Language_Codes
-fromBcp47 :: [String] -> String
-fromBcp47 [] = ""
-fromBcp47 ("ar":"SY":_) = "ar-sy"
-fromBcp47 ("ar":"IQ":_) = "ar-iq"
-fromBcp47 ("ar":"JO":_) = "ar-jo"
-fromBcp47 ("ar":"LB":_) = "ar-lb"
-fromBcp47 ("ar":"DZ":_) = "ar-dz"
-fromBcp47 ("ar":"MA":_) = "ar-ma"
-fromBcp47 ("de":"1901":_) = "deo"
-fromBcp47 ("de":"DE":_) = "de-de"
-fromBcp47 ("de":"AT":_) = "de-at"
-fromBcp47 ("de":"CH":_) = "de-ch"
-fromBcp47 ("el":"poly":_) = "agr"
-fromBcp47 ("en":"US":_) = "en-us"
-fromBcp47 ("en":"GB":_) = "en-gb"
-fromBcp47 ("grc":_) = "agr"
-fromBcp47 x = fromIso $ head x
- where
- fromIso "el" = "gr"
- fromIso "eu" = "ba"
- fromIso "he" = "il"
- fromIso "jp" = "ja"
- fromIso "uk" = "ua"
- fromIso "vi" = "vn"
- fromIso "zh" = "cn"
- fromIso l = l
+fromBCP47' :: Maybe Lang -> Maybe String
+fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
+fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
+fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn"
+fromBCP47' (Just (Lang l _ _ _) ) = Just l
+fromBCP47' Nothing = Nothing
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 1314ef844..363bad99b 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -46,6 +46,7 @@ import Data.Typeable
import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8)
import Scripting.Lua (LuaState, StackValue, callfunc)
import qualified Scripting.Lua as Lua
+import Text.Pandoc.Error
import Text.Pandoc.Lua.Compat ( loadstring )
import Text.Pandoc.Lua.Util ( addValue )
import Text.Pandoc.Lua.SharedInstances ()
@@ -141,8 +142,10 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do
let body = rendered
case writerTemplate opts of
Nothing -> return $ pack body
- Just tpl -> return $ pack $
- renderTemplate' tpl $ setField "body" body context
+ Just tpl ->
+ case applyTemplate (pack tpl) $ setField "body" body context of
+ Left e -> throw (PandocTemplateError e)
+ Right r -> return (pack r)
docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO String
docToCustom lua opts (Pandoc (Meta metamap) blocks) = do
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 02ffbf831..9db9a0102 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -124,9 +124,9 @@ writeDocbook opts (Pandoc meta blocks) = do
MathML -> True
_ -> False)
$ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to Docbook.
elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
@@ -217,8 +217,10 @@ blockToDocbook opts (Div (ident,_,_) bs) = do
(if null ident
then mempty
else selfClosingTag "anchor" [("id", ident)]) $$ contents
-blockToDocbook _ (Header _ _ _) =
- return empty -- should not occur after hierarchicalize
+blockToDocbook _ h@(Header _ _ _) = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return empty
blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
-- title beginning with fig: indicates that the image is a figure
blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 63bb8a5ae..fb6b2013a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -37,7 +37,7 @@ import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except (catchError)
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -68,6 +68,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared (fixDisplayMath)
+import Text.Pandoc.BCP47 (getLang, renderLang, toLang)
import Text.Printf (printf)
import Text.TeXMath
import Text.XML.Light as XML
@@ -257,12 +258,11 @@ writeDocx opts doc@(Pandoc meta _) = do
)
-- styles
- let lang = case lookupMeta "lang" meta of
- Just (MetaInlines [Str s]) -> Just s
- Just (MetaString s) -> Just s
- _ -> Nothing
+ mblang <- toLang $ getLang opts meta
let addLang :: Element -> Element
- addLang e = case lang >>= \l -> (return . XMLC.toTree . go l . XMLC.fromElement) e of
+ addLang e = case mblang >>= \l ->
+ (return . XMLC.toTree . go (renderLang l)
+ . XMLC.fromElement) e of
Just (Elem e') -> e'
_ -> e -- return original
where go :: String -> Cursor -> Cursor
@@ -657,6 +657,9 @@ mkNumbering lists = do
elts <- mapM mkAbstractNum (ordNub lists)
return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
+maxListLevel :: Int
+maxListLevel = 8
+
mkNum :: ListMarker -> Int -> Element
mkNum marker numid =
mknode "w:num" [("w:numId",show numid)]
@@ -666,7 +669,8 @@ mkNum marker numid =
BulletMarker -> []
NumberMarker _ _ start ->
map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
- $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
+ $ mknode "w:startOverride" [("w:val",show start)] ())
+ [0..maxListLevel]
mkAbstractNum :: (PandocMonad m) => ListMarker -> m Element
mkAbstractNum marker = do
@@ -675,7 +679,8 @@ mkAbstractNum marker = do
return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
$ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
: mknode "w:multiLevelType" [("w:val","multilevel")] ()
- : map (mkLvl marker) [0..6]
+ : map (mkLvl marker)
+ [0..maxListLevel]
mkLvl :: ListMarker -> Int -> Element
mkLvl marker lvl =
@@ -706,7 +711,7 @@ mkLvl marker lvl =
bulletFor 3 = "\x2013"
bulletFor 4 = "\x2022"
bulletFor 5 = "\x2013"
- bulletFor _ = "\x2022"
+ bulletFor x = bulletFor (x `mod` 6)
styleFor UpperAlpha _ = "upperLetter"
styleFor LowerAlpha _ = "lowerLetter"
styleFor UpperRoman _ = "upperRoman"
@@ -718,6 +723,7 @@ mkLvl marker lvl =
styleFor DefaultStyle 4 = "decimal"
styleFor DefaultStyle 5 = "lowerLetter"
styleFor DefaultStyle 6 = "lowerRoman"
+ styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 7)
styleFor _ _ = "decimal"
patternFor OneParen s = s ++ ")"
patternFor TwoParens s = "(" ++ s ++ ")"
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 551a1b0b5..ad8689e8c 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -41,7 +41,7 @@ DokuWiki: <https://www.dokuwiki.org/dokuwiki>
module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where
import Control.Monad (zipWithM)
import Control.Monad.Reader (ReaderT, ask, local, runReaderT)
-import Control.Monad.State (StateT, evalStateT, gets, modify)
+import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
import Data.Default (Default (..))
import Data.List (intercalate, intersect, isPrefixOf, transpose)
import Data.Text (Text, pack)
@@ -103,7 +103,7 @@ pandocToDokuWiki opts (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape special characters for DokuWiki.
escapeString :: String -> String
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index d68283007..a48fcf415 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -34,14 +34,14 @@ Conversion of 'Pandoc' documents to EPUB.
module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive,
fromArchive, fromEntry, toEntry)
-import Control.Monad (mplus, when, zipWithM)
+import Control.Monad (mplus, when, unless, zipWithM)
import Control.Monad.Except (catchError, throwError)
-import Control.Monad.State (State, StateT, evalState, evalStateT, get, gets,
+import Control.Monad.State.Strict (State, StateT, evalState, evalStateT, get, gets,
lift, modify, put)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Data.Text.Lazy as TL
-import Data.Char (isAlphaNum, isDigit, toLower)
+import Data.Char (isAlphaNum, isDigit, toLower, isAscii)
import Data.List (intercalate, isInfixOf, isPrefixOf)
import qualified Data.Map as M
import Data.Maybe (catMaybes, fromMaybe)
@@ -103,6 +103,7 @@ data EPUBMetadata = EPUBMetadata{
, epubCoverImage :: Maybe String
, epubStylesheets :: [FilePath]
, epubPageDirection :: Maybe ProgressionDirection
+ , epubIbooksFields :: [(String, String)]
} deriving Show
data Date = Date{
@@ -312,6 +313,7 @@ metadataFromMeta opts meta = EPUBMetadata{
, epubCoverImage = coverImage
, epubStylesheets = stylesheets
, epubPageDirection = pageDirection
+ , epubIbooksFields = ibooksFields
}
where identifiers = getIdentifier meta
titles = getTitle meta
@@ -339,6 +341,10 @@ metadataFromMeta opts meta = EPUBMetadata{
Just "ltr" -> Just LTR
Just "rtl" -> Just RTL
_ -> Nothing
+ ibooksFields = case lookupMeta "ibooks" meta of
+ Just (MetaMap mp)
+ -> M.toList $ M.map metaValueToString mp
+ _ -> []
-- | Produce an EPUB2 file from a Pandoc document.
writeEPUB2 :: PandocMonad m
@@ -361,8 +367,7 @@ writeEPUB :: PandocMonad m
-> Pandoc -- ^ Document to convert
-> m B.ByteString
writeEPUB epubVersion opts doc =
- let initState = EPUBState { stMediaPaths = []
- }
+ let initState = EPUBState { stMediaPaths = [] }
in
evalStateT (pandocToEPUB epubVersion opts doc)
initState
@@ -373,6 +378,10 @@ pandocToEPUB :: PandocMonad m
-> Pandoc
-> E m B.ByteString
pandocToEPUB version opts doc@(Pandoc meta _) = do
+ let epubSubdir = writerEpubSubdirectory opts
+ -- sanity check on epubSubdir
+ unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+ throwError $ PandocEpubSubdirectoryError epubSubdir
let epub3 = version == EPUB3
let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) .
writeHtmlStringForEPUB version o
@@ -383,14 +392,15 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
-- stylesheet
stylesheets <- case epubStylesheets metadata of
[] -> (\x -> [B.fromChunks [x]]) <$>
- P.readDataFile (writerUserDataDir opts) "epub.css"
+ P.readDataFile (writerUserDataDir opts)
+ "epub.css"
fs -> mapM P.readFileLazy fs
let stylesheetEntries = zipWith
- (\bs n -> mkEntry ("stylesheet" ++ show n ++ ".css") bs)
+ (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs)
stylesheets [(1 :: Int)..]
let vars = ("epub3", if epub3 then "true" else "false")
- : map (\e -> ("css", eRelativePath e)) stylesheetEntries
+ : map (\e -> ("css", "../" ++ eRelativePath e)) stylesheetEntries
++ [(x,y) | (x,y) <- writerVariables opts, x /= "css"]
let opts' = opts{ writerEmailObfuscation = NoObfuscation
, writerSectionDivs = True
@@ -418,7 +428,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
tpContent <- lift $ writeHtml opts'{
writerVariables = ("titlepage","true"):vars }
(Pandoc meta [])
- let tpEntry = mkEntry "title_page.xhtml" tpContent
+ let tpEntry = mkEntry "text/title_page.xhtml" tpContent
-- handle pictures
-- mediaRef <- P.newIORef []
@@ -431,7 +441,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
when (null xs) $
report $ CouldNotFetchResource f "glob did not match any font files"
return xs
- let mkFontEntry f = mkEntry (takeFileName f) `fmap` (lift $ P.readFileLazy f)
+ let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) <$>
+ lift (P.readFileLazy f)
fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
fontEntries <- mapM mkFontEntry fontFiles
@@ -516,7 +527,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
chapters'
let chapToEntry num (Chapter mbnum bs) =
- mkEntry (showChapter num) <$>
+ mkEntry ("text/" ++ showChapter num) <$>
(writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum }
$ case bs of
(Header _ _ xs : _) ->
@@ -572,7 +583,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
EPUB2 -> "2.0"
EPUB3 -> "3.0")
,("xmlns","http://www.idpf.org/2007/opf")
- ,("unique-identifier","epub-id-1")] $
+ ,("unique-identifier","epub-id-1")
+ ,("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/")] $
[ metadataElement version metadata currentTime
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
@@ -648,12 +660,12 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
navMapFormatter n tit src subs = unode "navPoint" !
[("id", "navPoint-" ++ show n)] $
[ unode "navLabel" $ unode "text" tit
- , unode "content" ! [("src", src)] $ ()
+ , unode "content" ! [("src", "text/" ++ src)] $ ()
] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
[ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
- , unode "content" ! [("src","title_page.xhtml")] $ () ]
+ , unode "content" ! [("src","text/title_page.xhtml")] $ () ]
navMap <- lift $ evalStateT (mapM (navPointNode navMapFormatter) secs) 1
let tocData = UTF8.fromStringLazy $ ppTopElement $
@@ -681,7 +693,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
navXhtmlFormatter n tit src subs = unode "li" !
[("id", "toc-li-" ++ show n)] $
- (unode "a" ! [("href",src)]
+ (unode "a" ! [("href", "text/" ++
+ src)]
$ tit)
: case subs of
[] -> []
@@ -714,7 +727,11 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
]
]
else []
- navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):vars }
+ navData <- lift $ writeHtml opts'{ writerVariables = ("navpage","true"):
+ -- remove the leading ../ from stylesheet paths:
+ map (\(k,v) -> if k == "css"
+ then (k, drop 3 v)
+ else (k, v)) vars }
(Pandoc (setMeta "title"
(walk removeNote $ fromList $ docTitle' meta) nullMeta)
(navBlocks ++ landmarks))
@@ -728,7 +745,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
- unode "rootfile" ! [("full-path","content.opf")
+ unode "rootfile" ! [("full-path",
+ epubSubdir ++ ['/' | not (null epubSubdir)] ++ "content.opf")
,("media-type","application/oebps-package+xml")] $ ()
let containerEntry = mkEntry "META-INF/container.xml" containerData
@@ -739,10 +757,14 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
unode "option" ! [("name","specified-fonts")] $ "true"
let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+ let addEpubSubdir :: Entry -> Entry
+ addEpubSubdir e = e{ eRelativePath =
+ epubSubdir ++ ['/' | not (null epubSubdir)] ++ eRelativePath e }
-- construct archive
- let archive = foldr addEntryToArchive emptyArchive
- (mimetypeEntry : containerEntry : appleEntry : tpEntry :
- contentsEntry : tocEntry : navEntry :
+ let archive = foldr addEntryToArchive emptyArchive $
+ [mimetypeEntry, containerEntry, appleEntry] ++
+ map addEpubSubdir
+ (tpEntry : contentsEntry : tocEntry : navEntry :
(stylesheetEntries ++ picEntries ++ cpicEntry ++
cpgEntry ++ chapterEntries ++ fontEntries))
return $ fromArchive archive
@@ -751,7 +773,8 @@ metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
metadataElement version md currentTime =
unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
- where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
+ where mdNodes = identifierNodes ++ titleNodes ++ dateNodes
+ ++ languageNodes ++ ibooksNodes
++ creatorNodes ++ contributorNodes ++ subjectNodes
++ descriptionNodes ++ typeNodes ++ formatNodes
++ publisherNodes ++ sourceNodes ++ relationNodes
@@ -770,6 +793,8 @@ metadataElement version md currentTime =
[] -> []
(x:_) -> [dcNode "date" ! [("id","epub-date")]
$ dateText x]
+ ibooksNodes = map ibooksNode (epubIbooksFields md)
+ ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
languageNodes = [dcTag "language" $ epubLanguage md]
creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
epubCreator md
@@ -883,10 +908,10 @@ modifyMediaRef opts oldsrc = do
Nothing -> catchError
(do (img, mbMime) <- P.fetchItem (writerSourceURL opts) oldsrc
let new = "media/file" ++ show (length media) ++
- fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
- (('.':) <$> (mbMime >>= extensionFromMimeType))
+ fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
+ (('.':) <$> (mbMime >>= extensionFromMimeType))
epochtime <- floor `fmap` lift P.getPOSIXTime
- let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
+ let entry = toEntry new epochtime (B.fromChunks . (:[]) $ img)
modify $ \st -> st{ stMediaPaths =
(oldsrc, (new, Just entry)):media}
return new)
@@ -913,12 +938,13 @@ transformInline :: PandocMonad m
-> E m Inline
transformInline opts (Image attr lab (src,tit)) = do
newsrc <- modifyMediaRef opts src
- return $ Image attr lab (newsrc, tit)
+ return $ Image attr lab ("../" ++ newsrc, tit)
transformInline opts (x@(Math t m))
| WebTeX url <- writerHTMLMathMethod opts = do
newsrc <- modifyMediaRef opts (url ++ urlEncode m)
let mathclass = if t == DisplayMath then "display" else "inline"
- return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] (newsrc, "")]
+ return $ Span ("",["math",mathclass],[])
+ [Image nullAttr [x] ("../" ++ newsrc, "")]
transformInline opts (RawInline fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 213756330..4c764d987 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -37,9 +37,9 @@ FictionBook is an XML-based e-book format. For more information see:
-}
module Text.Pandoc.Writers.FB2 (writeFB2) where
-import Control.Monad.Except (catchError, throwError)
-import Control.Monad.State (StateT, evalStateT, get, lift, modify)
-import Control.Monad.State (liftM)
+import Control.Monad.Except (catchError)
+import Control.Monad.State.Strict (StateT, evalStateT, get, lift, modify)
+import Control.Monad.State.Strict (liftM)
import Data.ByteString.Base64 (encode)
import qualified Data.ByteString.Char8 as B8
import Data.Char (isAscii, isControl, isSpace, toLower)
@@ -54,7 +54,6 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Class (PandocMonad, report)
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Definition
-import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara,
@@ -371,8 +370,10 @@ blockToXml (DefinitionList defs) =
needsBreak (Para _) = False
needsBreak (Plain ins) = LineBreak `notElem` ins
needsBreak _ = True
-blockToXml (Header _ _ _) = -- should never happen, see renderSections
- throwError $ PandocShouldNeverHappenError "unexpected header in section text"
+blockToXml h@(Header _ _ _) = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return []
blockToXml HorizontalRule = return
[ el "empty-line" ()
, el "p" (txt (replicate 10 '—'))
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 5ee8ab4ce..451123a6d 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -43,7 +43,7 @@ module Text.Pandoc.Writers.HTML (
writeDZSlides,
writeRevealJs
) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (ord, toLower)
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
@@ -210,7 +210,7 @@ writeHtmlString' st opts d = do
lookup "sourcefile" (writerVariables opts)
report $ NoTitleElement fallback
return $ resetField "pagetitle" fallback context
- return $ renderTemplate' tpl $
+ renderTemplate' tpl $
defField "body" (renderHtml' body) context'
writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
@@ -241,7 +241,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
then blocks
else prepSlides slideLevel blocks
toc <- if writerTableOfContents opts && slideVariant /= S5Slides
- then tableOfContents opts sects
+ then fmap renderHtml' <$> tableOfContents opts sects
else return Nothing
blocks' <- liftM (mconcat . intersperse (nl opts)) $
mapM (elementToHtml slideLevel opts) sects
@@ -253,7 +253,9 @@ pandocToHtml opts (Pandoc meta blocks) = do
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
$ mempty
- MathJax url ->
+ MathJax url
+ | slideVariant /= RevealJsSlides ->
+ -- mathjax is handled via a special plugin in revealjs
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
$ case slideVariant of
@@ -285,8 +287,16 @@ pandocToHtml opts (Pandoc meta blocks) = do
(if stMath st
then defField "math" (renderHtml' math)
else id) $
+ defField "mathjax"
+ (case writerHTMLMathMethod opts of
+ MathJax _ -> True
+ _ -> False) $
defField "quotes" (stQuotes st) $
- maybe id (defField "toc" . renderHtml') toc $
+ -- for backwards compatibility we populate toc
+ -- with the contents of the toc, rather than a
+ -- boolean:
+ maybe id (defField "toc") toc $
+ maybe id (defField "table-of-contents") toc $
defField "author-meta" authsMeta $
maybe id (defField "date-meta") (normalizeDate dateMeta) $
defField "pagetitle" (stringifyHTML (docTitle meta)) $
@@ -597,7 +607,8 @@ blockToHtml opts (Para lst)
contents <- inlineListToHtml opts lst
return $ H.p contents
where
- isEmptyRaw [RawInline f _] = f /= (Format "html")
+ isEmptyRaw [RawInline f _] = f `notElem` [Format "html",
+ Format "html4", Format "html5"]
isEmptyRaw _ = False
blockToHtml opts (LineBlock lns) =
if writerWrapText opts == WrapNone
@@ -626,14 +637,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do
NoSlides -> addAttrs opts' attr $ H.div $ contents'
_ -> mempty
else addAttrs opts (ident, classes', kvs) $ divtag $ contents'
-blockToHtml opts (RawBlock f str)
- | f == Format "html" = return $ preEscapedString str
- | (f == Format "latex" || f == Format "tex") &&
- allowsMathEnvironments (writerHTMLMathMethod opts) &&
- isMathEnvironment str = blockToHtml opts $ Plain [Math DisplayMath str]
- | otherwise = do
- report $ BlockNotRendered (RawBlock f str)
- return mempty
+blockToHtml opts (RawBlock f str) = do
+ ishtml <- isRawHtml f
+ if ishtml
+ then return $ preEscapedString str
+ else if (f == Format "latex" || f == Format "tex") &&
+ allowsMathEnvironments (writerHTMLMathMethod opts) &&
+ isMathEnvironment str
+ then blockToHtml opts $ Plain [Math DisplayMath str]
+ else do
+ report $ BlockNotRendered (RawBlock f str)
+ return mempty
blockToHtml _ (HorizontalRule) = do
html5 <- gets stHtml5
return $ if html5 then H5.hr else H.hr
@@ -971,11 +985,13 @@ inlineToHtml opts inline = do
return $ case t of
InlineMath -> m
DisplayMath -> brtag >> m >> brtag
- (RawInline f str)
- | f == Format "html" -> return $ preEscapedString str
- | otherwise -> do
- report $ InlineNotRendered inline
- return mempty
+ (RawInline f str) -> do
+ ishtml <- isRawHtml f
+ if ishtml
+ then return $ preEscapedString str
+ else do
+ report $ InlineNotRendered inline
+ return mempty
(Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
lift $ obfuscateLink opts attr linkText s
@@ -1123,3 +1139,9 @@ allowsMathEnvironments (MathJax _) = True
allowsMathEnvironments (MathML) = True
allowsMathEnvironments (WebTeX _) = True
allowsMathEnvironments _ = False
+
+isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool
+isRawHtml f = do
+ html5 <- gets stHtml5
+ return $ f == Format "html" ||
+ ((html5 && f == Format "html5") || f == Format "html4")
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 1ad9acd40..d1146ca73 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to haddock markup.
Haddock: <http://www.haskell.org/haddock/doc/html/>
-}
module Text.Pandoc.Writers.Haddock (writeHaddock) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Default
import Data.Text (Text)
import Data.List (intersperse, transpose)
@@ -80,7 +80,7 @@ pandocToHaddock opts (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return haddock representation of notes.
notesToHaddock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index 2884bc532..37df58e65 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -17,7 +17,7 @@ into InDesign with File -> Place.
-}
module Text.Pandoc.Writers.ICML (writeICML) where
import Control.Monad.Except (catchError)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix)
import qualified Data.Set as Set
import Data.Text as Text (breakOnAll, pack)
@@ -147,9 +147,9 @@ writeICML opts (Pandoc meta blocks) = do
$ defField "parStyles" (render' $ parStylesToDoc st)
$ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
$ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Auxilary functions for parStylesToDoc and charStylesToDoc.
contains :: String -> (String, (String, String)) -> [(String, String)]
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 1a8d80747..012ff8416 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -128,9 +128,9 @@ docToJATS opts (Pandoc meta blocks) = do
MathML -> True
_ -> False)
$ metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to JATS.
elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
@@ -203,8 +203,10 @@ blockToJATS opts (Div (ident,_,kvs) bs) = do
[(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
"content-type", "orientation", "position"]]
return $ inTags True "boxed-text" attr contents
-blockToJATS _ (Header _ _ _) =
- return empty -- should not occur after hierarchicalize
+blockToJATS _ h@(Header _ _ _) = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return empty
-- No Plain, everything needs to be in a block-level tag
blockToJATS opts (Plain lst) = blockToJATS opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 80606d510..55ecda819 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -35,16 +35,17 @@ module Text.Pandoc.Writers.LaTeX (
, writeBeamer
) where
import Control.Applicative ((<|>))
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Aeson (FromJSON, object, (.=))
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
toLower)
-import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy,
+import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
stripPrefix, (\\))
import Data.Maybe (catMaybes, fromMaybe, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import Network.URI (unEscapeString)
+import Text.Pandoc.BCP47 (Lang(..), toLang, getLang, renderLang)
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
@@ -188,7 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
- let docLangs = nub $ query (extract "lang") blocks
+ docLangs <- catMaybes <$>
+ mapM (toLang . Just) (ordNub (query (extract "lang") blocks))
let hasStringValue x = isJust (getField x metadata :: Maybe String)
let geometryFromMargins = intercalate [','] $ catMaybes $
map (\(x,y) ->
@@ -198,6 +200,18 @@ pandocToLaTeX options (Pandoc meta blocks) = do
,("tmargin","margin-top")
,("bmargin","margin-bottom")
]
+ let toPolyObj lang = object [ "name" .= T.pack name
+ , "options" .= T.pack opts ]
+ where
+ (name, opts) = toPolyglossia lang
+ mblang <- toLang $ case getLang options meta of
+ Just l -> Just l
+ Nothing | null docLangs -> Nothing
+ | otherwise -> Just "en"
+ -- we need a default here since lang is used in template conditionals
+
+ let dirs = query (extract "dir") blocks
+
let context = defField "toc" (writerTableOfContents options) $
defField "toc-depth" (show (writerTOCDepth options -
if stBook st
@@ -235,26 +249,24 @@ pandocToLaTeX options (Pandoc meta blocks) = do
Biblatex -> defField "biblio-title" biblioTitle .
defField "biblatex" True
_ -> id) $
- -- set lang to something so polyglossia/babel is included
- defField "lang" (if null docLangs then ""::String else "en") $
- defField "otherlangs" docLangs $
defField "colorlinks" (any hasStringValue
["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
- defField "dir" (if (null $ query (extract "dir") blocks)
- then ""::String
- else "ltr") $
+ (if null dirs
+ then id
+ else defField "dir" ("ltr" :: String)) $
defField "section-titles" True $
defField "geometry" geometryFromMargins $
+ (case getField "papersize" metadata of
+ Just ("A4" :: String) -> resetField "papersize"
+ ("a4" :: String)
+ _ -> id) $
metadata
- let toPolyObj lang = object [ "name" .= T.pack name
- , "options" .= T.pack opts ]
- where
- (name, opts) = toPolyglossia lang
- let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
- otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
let context' =
- defField "babel-lang" (toBabel lang)
- $ defField "babel-otherlangs" (map toBabel otherlangs)
+ -- note: lang is used in some conditionals in the template,
+ -- so we need to set it if we have any babel/polyglossia:
+ maybe id (defField "lang" . renderLang) mblang
+ $ maybe id (defField "babel-lang" . toBabel) mblang
+ $ defField "babel-otherlangs" (map toBabel docLangs)
$ defField "babel-newcommands" (concatMap (\(poly, babel) ->
-- \textspanish and \textgalician are already used by babel
-- save them as \oritext... and let babel use that
@@ -274,20 +286,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do
-- eliminate duplicates that have same polyglossia name
$ nubBy (\a b -> fst a == fst b)
-- find polyglossia and babel names of languages used in the document
- $ map (\l ->
- let lng = splitBy (=='-') l
- in (fst $ toPolyglossia lng, toBabel lng)
- )
- docLangs )
- $ defField "polyglossia-lang" (toPolyObj lang)
- $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
- $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
- Just "rtl" -> True
- _ -> False)
+ $ map (\l -> (fst $ toPolyglossia l, toBabel l)) docLangs
+ )
+ $ maybe id (defField "polyglossia-lang" . toPolyObj) mblang
+ $ defField "polyglossia-otherlangs" (map toPolyObj docLangs)
+ $ defField "latex-dir-rtl"
+ (getField "dir" context == Just ("rtl" :: String))
$ context
- return $ case writerTemplate options of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context'
+ case writerTemplate options of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context'
-- | Convert Elements to LaTeX
elementToLaTeX :: PandocMonad m => WriterOptions -> Element -> LW m Doc
@@ -443,11 +451,12 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
-> "\\leavevmode" <> linkAnchor' <> "%"
_ -> linkAnchor'
let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+ lang <- toLang $ lookup "lang" kvs
let wrapDir = case lookup "dir" kvs of
Just "rtl" -> align "RTL"
Just "ltr" -> align "LTR"
_ -> id
- wrapLang txt = case lookup "lang" kvs of
+ wrapLang txt = case lang of
Just lng -> let (l, o) = toPolyglossiaEnv lng
ops = if null o
then ""
@@ -647,23 +656,25 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
modify $ \s -> s{stInHeading = False}
return hdr
blockToLaTeX (Table caption aligns widths heads rows) = do
- headers <- if all null heads
- then return empty
- else do
- contents <- (tableRowToLaTeX True aligns widths) heads
- return ("\\toprule" $$ contents $$ "\\midrule")
- let endhead = if all null heads
- then empty
- else text "\\endhead"
- let endfirsthead = if all null heads
- then empty
- else text "\\endfirsthead"
+ let toHeaders hs = do contents <- (tableRowToLaTeX True aligns widths) hs
+ return ("\\toprule" $$ contents $$ "\\midrule")
+ let removeNote (Note _) = Span ("", [], []) []
+ removeNote x = x
captionText <- inlineListToLaTeX caption
+ firsthead <- if isEmpty captionText || all null heads
+ then return empty
+ else ($$ text "\\endfirsthead") <$> toHeaders heads
+ head' <- if all null heads
+ then return empty
+ -- avoid duplicate notes in head and firsthead:
+ else ($$ text "\\endhead") <$>
+ toHeaders (if isEmpty firsthead
+ then heads
+ else walk removeNote heads)
let capt = if isEmpty captionText
then empty
- else text "\\caption" <> braces captionText <> "\\tabularnewline"
- $$ headers
- $$ endfirsthead
+ else text "\\caption" <>
+ braces captionText <> "\\tabularnewline"
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
let colDescriptors = text $ concat $ map toColDescriptor aligns
modify $ \s -> s{ stTable = True }
@@ -671,9 +682,9 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
$$ capt
+ $$ firsthead
$$ (if all null heads then "\\toprule" else empty)
- $$ headers
- $$ endhead
+ $$ head'
$$ vcat rows'
$$ "\\bottomrule"
$$ "\\end{longtable}"
@@ -916,13 +927,14 @@ inlineToLaTeX :: PandocMonad m
-> LW m Doc
inlineToLaTeX (Span (id',classes,kvs) ils) = do
linkAnchor <- hypertarget False id' empty
+ lang <- toLang $ lookup "lang" kvs
let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
["textnormal" | "csl-no-strong" `elem` classes ||
"csl-no-smallcaps" `elem` classes] ++
["RL" | ("dir", "rtl") `elem` kvs] ++
["LR" | ("dir", "ltr") `elem` kvs] ++
- (case lookup "lang" kvs of
- Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng
+ (case lang of
+ Just lng -> let (l, o) = toPolyglossia lng
ops = if null o then "" else ("[" ++ o ++ "]")
in ["text" ++ l ++ ops]
Nothing -> [])
@@ -1237,9 +1249,9 @@ mbBraced x = if not (all isAlphaNum x)
-- Extract a key from divs and spans
extract :: String -> Block -> [String]
extract key (Div attr _) = lookKey key attr
-extract key (Plain ils) = concatMap (extractInline key) ils
-extract key (Para ils) = concatMap (extractInline key) ils
-extract key (Header _ _ ils) = concatMap (extractInline key) ils
+extract key (Plain ils) = query (extractInline key) ils
+extract key (Para ils) = query (extractInline key) ils
+extract key (Header _ _ ils) = query (extractInline key) ils
extract _ _ = []
-- Extract a key from spans
@@ -1252,85 +1264,95 @@ lookKey :: String -> Attr -> [String]
lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
-- In environments \Arabic instead of \arabic is used
-toPolyglossiaEnv :: String -> (String, String)
+toPolyglossiaEnv :: Lang -> (String, String)
toPolyglossiaEnv l =
- case toPolyglossia $ (splitBy (=='-')) l of
+ case toPolyglossia l of
("arabic", o) -> ("Arabic", o)
x -> x
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Polyglossia (language, options) tuple
-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
-toPolyglossia :: [String] -> (String, String)
-toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria")
-toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya")
-toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco")
-toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania")
-toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq")
-toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia")
-toPolyglossia ("de":"1901":_) = ("german", "spelling=old")
-toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old")
-toPolyglossia ("de":"AT":_) = ("german", "variant=austrian")
-toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old")
-toPolyglossia ("de":"CH":_) = ("german", "variant=swiss")
-toPolyglossia ("de":_) = ("german", "")
-toPolyglossia ("dsb":_) = ("lsorbian", "")
-toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly")
-toPolyglossia ("en":"AU":_) = ("english", "variant=australian")
-toPolyglossia ("en":"CA":_) = ("english", "variant=canadian")
-toPolyglossia ("en":"GB":_) = ("english", "variant=british")
-toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand")
-toPolyglossia ("en":"UK":_) = ("english", "variant=british")
-toPolyglossia ("en":"US":_) = ("english", "variant=american")
-toPolyglossia ("grc":_) = ("greek", "variant=ancient")
-toPolyglossia ("hsb":_) = ("usorbian", "")
-toPolyglossia ("la":"x":"classic":_) = ("latin", "variant=classic")
-toPolyglossia ("sl":_) = ("slovenian", "")
-toPolyglossia x = (commonFromBcp47 x, "")
+toPolyglossia :: Lang -> (String, String)
+toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars)
+ | "1901" `elem` vars = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ "AT" vars)
+ | "1901" `elem` vars = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ "CH" vars)
+ | "1901" `elem` vars = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _) = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "")
+toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly")
+toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient")
+toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars)
+ | "x-classic" `elem` vars = ("latin", "variant=classic")
+toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "")
+toPolyglossia x = (commonFromBcp47 x, "")
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Babel language string.
-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
-- List of supported languages (slightly outdated):
-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
-toBabel :: [String] -> String
-toBabel ("de":"1901":_) = "german"
-toBabel ("de":"AT":"1901":_) = "austrian"
-toBabel ("de":"AT":_) = "naustrian"
-toBabel ("de":"CH":"1901":_) = "swissgerman"
-toBabel ("de":"CH":_) = "nswissgerman"
-toBabel ("de":_) = "ngerman"
-toBabel ("dsb":_) = "lowersorbian"
-toBabel ("el":"polyton":_) = "polutonikogreek"
-toBabel ("en":"AU":_) = "australian"
-toBabel ("en":"CA":_) = "canadian"
-toBabel ("en":"GB":_) = "british"
-toBabel ("en":"NZ":_) = "newzealand"
-toBabel ("en":"UK":_) = "british"
-toBabel ("en":"US":_) = "american"
-toBabel ("fr":"CA":_) = "canadien"
-toBabel ("fra":"aca":_) = "acadian"
-toBabel ("grc":_) = "polutonikogreek"
-toBabel ("hsb":_) = "uppersorbian"
-toBabel ("la":"x":"classic":_) = "classiclatin"
-toBabel ("sl":_) = "slovene"
-toBabel x = commonFromBcp47 x
+toBabel :: Lang -> String
+toBabel (Lang "de" _ "AT" vars)
+ | "1901" `elem` vars = "austrian"
+ | otherwise = "naustrian"
+toBabel (Lang "de" _ "CH" vars)
+ | "1901" `elem` vars = "swissgerman"
+ | otherwise = "nswissgerman"
+toBabel (Lang "de" _ _ vars)
+ | "1901" `elem` vars = "german"
+ | otherwise = "ngerman"
+toBabel (Lang "dsb" _ _ _) = "lowersorbian"
+toBabel (Lang "el" _ _ vars)
+ | "polyton" `elem` vars = "polutonikogreek"
+toBabel (Lang "en" _ "AU" _) = "australian"
+toBabel (Lang "en" _ "CA" _) = "canadian"
+toBabel (Lang "en" _ "GB" _) = "british"
+toBabel (Lang "en" _ "NZ" _) = "newzealand"
+toBabel (Lang "en" _ "UK" _) = "british"
+toBabel (Lang "en" _ "US" _) = "american"
+toBabel (Lang "fr" _ "CA" _) = "canadien"
+toBabel (Lang "fra" _ _ vars)
+ | "aca" `elem` vars = "acadian"
+toBabel (Lang "grc" _ _ _) = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _) = "uppersorbian"
+toBabel (Lang "la" _ _ vars)
+ | "x-classic" `elem` vars = "classiclatin"
+toBabel (Lang "sl" _ _ _) = "slovene"
+toBabel x = commonFromBcp47 x
-- Takes a list of the constituents of a BCP 47 language code
-- and converts it to a string shared by Babel and Polyglossia.
-- https://tools.ietf.org/html/bcp47#section-2.1
-commonFromBcp47 :: [String] -> String
-commonFromBcp47 [] = ""
-commonFromBcp47 ("pt":"BR":_) = "brazil"
+commonFromBcp47 :: Lang -> String
+commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil"
-- Note: documentation says "brazilian" works too, but it doesn't seem to work
-- on some systems. See #2953.
-commonFromBcp47 ("sr":"Cyrl":_) = "serbianc"
-commonFromBcp47 ("zh":"Latn":"pinyin":_) = "pinyin"
-commonFromBcp47 x = fromIso $ head x
+commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc"
+commonFromBcp47 (Lang "zh" "Latn" _ vars)
+ | "pinyin" `elem` vars = "pinyin"
+commonFromBcp47 (Lang l _ _ _) = fromIso l
where
fromIso "af" = "afrikaans"
fromIso "am" = "amharic"
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 0fc6afbdc..4e756c419 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to groff man page format.
-}
module Text.Pandoc.Writers.Man ( writeMan) where
import Control.Monad.Except (throwError)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.List (intercalate, intersperse, stripPrefix, sort)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
@@ -110,7 +110,7 @@ pandocToMan opts (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return man representation of notes.
notesToMan :: PandocMonad m => WriterOptions -> [[Block]] -> StateT WriterState m Doc
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 3ac677943..1e0d8bde2 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -35,7 +35,7 @@ Markdown: <http://daringfireball.net/projects/markdown/>
-}
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (chr, isPunctuation, isSpace, ord)
import Data.Default
import qualified Data.HashMap.Strict as H
@@ -209,8 +209,8 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
Nothing -> empty
let headerBlocks = filter isHeaderBlock blocks
toc <- if writerTableOfContents opts
- then tableOfContents opts headerBlocks
- else return empty
+ then render' <$> tableOfContents opts headerBlocks
+ else return ""
-- Strip off final 'references' header if markdown citations enabled
let blocks' = if isEnabled Ext_citations opts
then case reverse blocks of
@@ -220,7 +220,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
body <- blockListToMarkdown opts blocks'
notesAndRefs' <- notesAndRefs opts
let main = render' $ body <> notesAndRefs'
- let context = defField "toc" (render' toc)
+ let context = -- for backwards compatibility we populate toc
+ -- with the contents of the toc, rather than a
+ -- boolean:
+ defField "toc" toc
+ $ defField "table-of-contents" toc
$ defField "body" main
$ (if isNullMeta meta
then id
@@ -228,7 +232,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
$ addVariablesToJSON opts metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return markdown representation of reference key table.
refsToMarkdown :: PandocMonad m => WriterOptions -> Refs -> MD m Doc
@@ -412,6 +416,9 @@ blockToMarkdown' opts (Plain inlines) = do
'+':s:_ | not isPlain && isSpace s -> "\\" <> contents
'*':s:_ | not isPlain && isSpace s -> "\\" <> contents
'-':s:_ | not isPlain && isSpace s -> "\\" <> contents
+ '+':[] | not isPlain -> "\\" <> contents
+ '*':[] | not isPlain -> "\\" <> contents
+ '-':[] | not isPlain -> "\\" <> contents
'|':_ | (isEnabled Ext_line_blocks opts ||
isEnabled Ext_pipe_tables opts)
&& isEnabled Ext_all_symbols_escapable opts
@@ -433,8 +440,10 @@ blockToMarkdown' opts (LineBlock lns) =
return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline
else blockToMarkdown opts $ linesToPara lns
blockToMarkdown' opts b@(RawBlock f str)
- | f == "markdown" = return $ text str <> text "\n"
- | f == "html" && isEnabled Ext_raw_html opts = do
+ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+ "markdown_mmd", "markdown_strict"]
+ = return $ text str <> text "\n"
+ | f `elem` ["html", "html5", "html4"] && isEnabled Ext_raw_html opts = do
plain <- asks envPlain
return $ if plain
then empty
@@ -1053,10 +1062,12 @@ inlineToMarkdown opts (Math DisplayMath str) =
(texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
inlineToMarkdown opts il@(RawInline f str) = do
plain <- asks envPlain
- if not plain &&
- ( f == "markdown" ||
+ if (plain && f == "plain") || (not plain &&
+ ( f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+ "markdown_mmd", "markdown_strict"] ||
(isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) ||
- (isEnabled Ext_raw_html opts && f == "html") )
+ (isEnabled Ext_raw_html opts && f `elem` ["html", "html4", "html5"])
+ ))
then return $ text str
else do
report $ InlineNotRendered il
diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs
index 104d3c20b..58252d60f 100644
--- a/src/Text/Pandoc/Writers/Math.hs
+++ b/src/Text/Pandoc/Writers/Math.hs
@@ -1,6 +1,8 @@
module Text.Pandoc.Writers.Math
( texMathToInlines
, convertMath
+ , defaultMathJaxURL
+ , defaultKaTeXURL
)
where
@@ -47,3 +49,8 @@ convertMath writer mt str = do
DisplayMath -> DisplayBlock
InlineMath -> DisplayInline
+defaultMathJaxURL :: String
+defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/"
+
+defaultKaTeXURL :: String
+defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index c70e5b786..58d1b0707 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -31,7 +31,7 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
-}
module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.List (intercalate)
import qualified Data.Set as Set
import Data.Text (Text, pack)
@@ -82,9 +82,8 @@ pandocToMediaWiki (Pandoc meta blocks) = do
let main = body ++ notes
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts) metadata
- return $ pack
- $ case writerTemplate opts of
- Nothing -> main
+ pack <$> case writerTemplate opts of
+ Nothing -> return main
Just tpl -> renderTemplate' tpl context
-- | Escape special characters for MediaWiki.
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index c5c3d9f5b..493da1545 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -52,7 +52,7 @@ import Text.Pandoc.Pretty
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.ImageSize
import Text.Pandoc.Logging
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char ( isLower, isUpper, toUpper )
import Text.TeXMath (writeEqn)
import System.FilePath (takeExtension)
@@ -125,7 +125,7 @@ pandocToMs opts (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Association list of characters to escape.
msEscapes :: Map.Map Char String
@@ -411,7 +411,8 @@ definitionListItemToMs opts (label, defs) = do
let (first, rest) = case blocks of
((Para x):y) -> (Plain x,y)
(x:y) -> (x,y)
- [] -> error "blocks is null"
+ [] -> (Plain [], [])
+ -- should not happen
rest' <- liftM vcat $
mapM (\item -> blockToMs opts item) rest
first' <- blockToMs opts first
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index 85e0b5467..0383d9d86 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -42,7 +42,7 @@ However, @\<literal style="html">@ tag is used for HTML raw blocks
even though it is supported only in Emacs Muse.
-}
module Text.Pandoc.Writers.Muse (writeMuse) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Text (Text)
import Data.List (intersperse, transpose, isInfixOf)
import System.FilePath (takeExtension)
@@ -97,11 +97,17 @@ pandocToMuse (Pandoc meta blocks) = do
body <- blockListToMuse blocks
notes <- liftM (reverse . stNotes) get >>= notesToMuse
let main = render colwidth $ body $+$ notes
- let context = defField "body" main
- $ metadata
+ let context = defField "body" main metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
+
+-- | Convert list of Pandoc block elements to Muse
+-- | without setting stTopLevel.
+flatBlockListToMuse :: PandocMonad m
+ => [Block] -- ^ List of block elements
+ -> StateT WriterState m Doc
+flatBlockListToMuse blocks = cat <$> mapM blockToMuse blocks
-- | Convert list of Pandoc block elements to Muse.
blockListToMuse :: PandocMonad m
@@ -112,11 +118,11 @@ blockListToMuse blocks = do
modify $ \s -> s { stTopLevel = not $ stInsideBlock s
, stInsideBlock = True
}
- contents <- mapM blockToMuse blocks
+ result <- flatBlockListToMuse blocks
modify $ \s -> s { stTopLevel = stTopLevel oldState
, stInsideBlock = stInsideBlock oldState
}
- return $ cat contents
+ return result
-- | Convert Pandoc block element to Muse.
blockToMuse :: PandocMonad m
@@ -129,23 +135,23 @@ blockToMuse (Para inlines) = do
blockToMuse (LineBlock lns) = do
let splitStanza [] = []
splitStanza xs = case break (== mempty) xs of
- (l, []) -> l : []
+ (l, []) -> [l]
(l, _:r) -> l : splitStanza r
let joinWithLinefeeds = nowrap . mconcat . intersperse cr
let joinWithBlankLines = mconcat . intersperse blankline
let prettyfyStanza ls = joinWithLinefeeds <$> mapM inlineListToMuse ls
contents <- joinWithBlankLines <$> mapM prettyfyStanza (splitStanza lns)
return $ blankline $$ "<verse>" $$ contents $$ "</verse>" <> blankline
-blockToMuse (CodeBlock (_,_,_) str) = do
+blockToMuse (CodeBlock (_,_,_) str) =
return $ "<example>" $$ text str $$ "</example>" $$ blankline
blockToMuse (RawBlock (Format format) str) =
return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$
text str $$ "</literal>" $$ blankline
blockToMuse (BlockQuote blocks) = do
- contents <- blockListToMuse blocks
+ contents <- flatBlockListToMuse blocks
return $ blankline
<> "<quote>"
- $$ flush contents -- flush to drop blanklines
+ $$ nest 0 contents -- nest 0 to remove trailing blank lines
$$ "</quote>"
<> blankline
blockToMuse (OrderedList (start, style, _) items) = do
@@ -154,11 +160,10 @@ blockToMuse (OrderedList (start, style, _) items) = do
let maxMarkerLength = maximum $ map length markers
let markers' = map (\m -> let s = maxMarkerLength - length m
in m ++ replicate s ' ') markers
- contents <- mapM (\(item, num) -> orderedListItemToMuse item num) $
- zip markers' items
+ contents <- zipWithM orderedListItemToMuse markers' items
-- ensure that sublists have preceding blank line
topLevel <- gets stTopLevel
- return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline
+ return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline
where orderedListItemToMuse :: PandocMonad m
=> String -- ^ marker for list item
-> [Block] -- ^ list item (list of blocks)
@@ -170,7 +175,7 @@ blockToMuse (BulletList items) = do
contents <- mapM bulletListItemToMuse items
-- ensure that sublists have preceding blank line
topLevel <- gets stTopLevel
- return $ cr $$ ((if topLevel then nest 1 else id) $ vcat contents) $$ blankline
+ return $ cr $$ (if topLevel then nest 1 else id) (vcat contents) $$ blankline
where bulletListItemToMuse :: PandocMonad m
=> [Block]
-> StateT WriterState m Doc
@@ -179,7 +184,7 @@ blockToMuse (BulletList items) = do
return $ hang 2 "- " contents
blockToMuse (DefinitionList items) = do
contents <- mapM definitionListItemToMuse items
- return $ cr $$ (nest 1 $ vcat $ contents) $$ blankline
+ return $ cr $$ nest 1 (vcat contents) $$ blankline
where definitionListItemToMuse :: PandocMonad m
=> ([Inline], [[Block]])
-> StateT WriterState m Doc
@@ -218,8 +223,8 @@ blockToMuse (Table caption _ _ headers rows) = do
-- FIXME: Muse doesn't allow blocks with height more than 1.
let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks
where h = maximum (1 : map height blocks)
- sep' = lblock (length sep) $ vcat (map text $ replicate h sep)
- let makeRow sep = (" " <>) . (hpipeBlocks sep . zipWith lblock widthsInChars)
+ sep' = lblock (length sep) $ vcat (replicate h (text sep))
+ let makeRow sep = (" " <>) . hpipeBlocks sep . zipWith lblock widthsInChars
let head' = makeRow " || " headers'
let rowSeparator = if noHeaders then " | " else " | "
rows'' <- mapM (\row -> do cols <- mapM blockListToMuse row
@@ -236,9 +241,7 @@ blockToMuse Null = return empty
notesToMuse :: PandocMonad m
=> Notes
-> StateT WriterState m Doc
-notesToMuse notes =
- mapM (\(num, note) -> noteToMuse num note) (zip [1..] notes) >>=
- return . vsep
+notesToMuse notes = liftM vsep (zipWithM noteToMuse [1 ..] notes)
-- | Return Muse representation of a note.
noteToMuse :: PandocMonad m
@@ -268,7 +271,7 @@ conditionalEscapeString s
inlineListToMuse :: PandocMonad m
=> [Inline]
-> StateT WriterState m Doc
-inlineListToMuse lst = mapM inlineToMuse lst >>= return . hcat
+inlineListToMuse lst = liftM hcat (mapM inlineToMuse lst)
-- | Convert Pandoc inline element to Muse.
inlineToMuse :: PandocMonad m
@@ -316,7 +319,7 @@ inlineToMuse Space = return space
inlineToMuse SoftBreak = do
wrapText <- gets $ writerWrapText . stOptions
return $ if wrapText == WrapPreserve then cr else space
-inlineToMuse (Link _ txt (src, _)) = do
+inlineToMuse (Link _ txt (src, _)) =
case txt of
[Str x] | escapeURI x == src ->
return $ "[[" <> text (escapeLink x) <> "]]"
@@ -340,7 +343,7 @@ inlineToMuse (Note contents) = do
-- add to notes in state
notes <- gets stNotes
modify $ \st -> st { stNotes = contents:notes }
- let ref = show $ (length notes) + 1
+ let ref = show $ length notes + 1
return $ "[" <> text ref <> "]"
inlineToMuse (Span (_,name:_,_) inlines) = do
contents <- inlineListToMuse inlines
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 1da051380..785891a9f 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -31,8 +31,9 @@ Conversion of 'Pandoc' documents to ODT.
module Text.Pandoc.Writers.ODT ( writeODT ) where
import Codec.Archive.Zip
import Control.Monad.Except (catchError)
-import Control.Monad.State
+import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
+import Data.Generics (everywhere', mkT)
import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe)
import qualified Data.Text.Lazy as TL
@@ -46,13 +47,14 @@ import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
import Text.Pandoc.Pretty
import Text.Pandoc.Shared (stringify)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
import Text.Pandoc.Walk
import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
import Text.Pandoc.Writers.Shared (fixDisplayMath)
+import Text.Pandoc.BCP47 (getLang, toLang, Lang(..), renderLang)
import Text.Pandoc.XML
import Text.TeXMath
-import Text.XML.Light.Output
+import Text.XML.Light
data ODTState = ODTState { stEntries :: [Entry]
}
@@ -78,6 +80,7 @@ pandocToODT :: PandocMonad m
pandocToODT opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
let title = docTitle meta
+ lang <- toLang (getLang opts meta)
refArchive <-
case writerReferenceDoc opts of
Just f -> liftM toArchive $ lift $ P.readFileLazy f
@@ -132,18 +135,49 @@ pandocToODT opts doc@(Pandoc meta _) = do
,("xmlns:ooo","http://openoffice.org/2004/office")
,("xmlns:grddl","http://www.w3.org/2003/g/data-view#")
,("office:version","1.2")]
- $ ( inTagsSimple "office:meta"
- $ ( inTagsSimple "dc:title" (text $ escapeStringForXML (stringify title))
- )
+ $ ( inTagsSimple "office:meta" $
+ ( inTagsSimple "dc:title"
+ (text $ escapeStringForXML (stringify title))
+ $$
+ case lang of
+ Just l -> inTagsSimple "dc:language"
+ (text (escapeStringForXML (renderLang l)))
+ Nothing -> empty
+ )
)
)
-- make sure mimetype is first
let mimetypeEntry = toEntry "mimetype" epochtime
$ fromStringLazy "application/vnd.oasis.opendocument.text"
- let archive'' = addEntryToArchive mimetypeEntry
+ archive'' <- updateStyleWithLang lang
+ $ addEntryToArchive mimetypeEntry
$ addEntryToArchive metaEntry archive'
return $ fromArchive archive''
+updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
+updateStyleWithLang Nothing arch = return arch
+updateStyleWithLang (Just lang) arch = do
+ epochtime <- floor `fmap` (lift P.getPOSIXTime)
+ return arch{ zEntries = [if eRelativePath e == "styles.xml"
+ then case parseXMLDoc
+ (toStringLazy (fromEntry e)) of
+ Nothing -> e
+ Just d ->
+ toEntry "styles.xml" epochtime
+ ( fromStringLazy
+ . ppTopElement
+ . addLang lang $ d )
+ else e
+ | e <- zEntries arch] }
+
+addLang :: Lang -> Element -> Element
+addLang lang = everywhere' (mkT updateLangAttr)
+ where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
+ = Attr n (langLanguage lang)
+ updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
+ = Attr n (langRegion lang)
+ updateLangAttr x = x
+
-- | transform both Image and Math elements
transformPicMath :: PandocMonad m => WriterOptions ->Inline -> O m Inline
transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 4a0a317fa..52577ac17 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -60,9 +60,9 @@ writeOPML opts (Pandoc meta blocks) = do
meta'
main <- (render colwidth . vcat) <$> (mapM (elementToOPML opts) elements)
let context = defField "body" main metadata
- return $ case writerTemplate opts of
- Nothing -> main
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
writeHtmlInlines :: PandocMonad m => [Inline] -> m Text
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 58295684e..ed3dabb87 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -33,7 +33,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML.
-}
module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
import Control.Arrow ((***), (>>>))
-import Control.Monad.State hiding (when)
+import Control.Monad.State.Strict hiding (when)
import Data.Char (chr)
import Data.List (sortBy)
import Data.Text (Text)
@@ -50,6 +50,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML
+import Text.Pandoc.BCP47 (parseBCP47, Lang(..))
import Text.Printf (printf)
-- | Auxiliary function to convert Plain block to Para.
@@ -168,8 +169,8 @@ inTextStyle d = do
inTags False "style:style"
[("style:name", styleName)
,("style:family", "text")]
- $ selfClosingTag "style:text-properties"
- (concatMap textStyleAttr (Set.toList at)))
+ $ selfClosingTag "style:text-properties"
+ (concatMap textStyleAttr (Set.toList at)))
return $ inTags False
"text:span" [("text:style-name",styleName)] d
@@ -219,11 +220,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do
let listStyles = map listStyle (stListStyles s)
let automaticStyles = vcat $ reverse $ styles ++ listStyles
let context = defField "body" body
+ $ defField "toc" (writerTableOfContents opts)
$ defField "automatic-styles" (render' automaticStyles)
$ metadata
- return $ case writerTemplate opts of
- Nothing -> body
- Just tpl -> renderTemplate' tpl context
+ case writerTemplate opts of
+ Nothing -> return body
+ Just tpl -> renderTemplate' tpl context
withParagraphStyle :: PandocMonad m
=> WriterOptions -> String -> [Block] -> OD m Doc
@@ -326,7 +328,8 @@ blockToOpenDocument o bs
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
| LineBlock b <- bs = blockToOpenDocument o $ linesToPara b
- | Div _ xs <- bs = blocksToOpenDocument o xs
+ | Div attr xs <- bs = withLangFromAttr attr
+ (blocksToOpenDocument o xs)
| Header i _ b <- bs = setFirstPara >>
(inHeaderTags i =<< inlinesToOpenDocument o b)
| BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
@@ -444,7 +447,7 @@ inlineToOpenDocument o ils
| writerWrapText o == WrapPreserve
-> return $ preformatted "\n"
| otherwise -> return $ space
- Span _ xs -> inlinesToOpenDocument o xs
+ Span attr xs -> withLangFromAttr attr (inlinesToOpenDocument o xs)
LineBreak -> return $ selfClosingTag "text:line-break" []
Str s -> return $ handleSpaces $ escapeStringForXML s
Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l
@@ -606,7 +609,14 @@ paraTableStyles t s (a:xs)
[ ("fo:text-align", x)
, ("style:justify-single-word", "false")]
-data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
+data TextStyle = Italic
+ | Bold
+ | Strike
+ | Sub
+ | Sup
+ | SmallC
+ | Pre
+ | Language Lang
deriving ( Eq,Ord )
textStyleAttr :: TextStyle -> [(String,String)]
@@ -624,4 +634,18 @@ textStyleAttr s
| Pre <- s = [("style:font-name" ,"Courier New")
,("style:font-name-asian" ,"Courier New")
,("style:font-name-complex" ,"Courier New")]
+ | Language lang <- s
+ = [("fo:language" ,langLanguage lang)
+ ,("fo:country" ,langRegion lang)]
| otherwise = []
+
+withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
+withLangFromAttr (_,_,kvs) action =
+ case lookup "lang" kvs of
+ Nothing -> action
+ Just l -> do
+ case parseBCP47 l of
+ Right lang -> withTextStyle (Language lang) action
+ Left _ -> do
+ report $ InvalidLang l
+ action
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index e8f48da00..48f17c4fb 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -35,7 +35,7 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode.
Org-Mode: <http://orgmode.org>
-}
module Text.Pandoc.Writers.Org (writeOrg) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (isAlphaNum, toLower)
import Data.Text (Text)
import Data.List (intersect, intersperse, isPrefixOf, partition, transpose)
@@ -86,7 +86,7 @@ pandocToOrg (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Return Org representation of notes.
notesToOrg :: PandocMonad m => [[Block]] -> Org m Doc
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 59f6553e2..019c8335d 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST ) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (isSpace, toLower)
import Data.List (isPrefixOf, stripPrefix)
import Data.Maybe (fromMaybe)
@@ -108,7 +108,7 @@ pandocToRST (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
where
normalizeHeadings lev (Header l a i:bs) =
Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 5c990f324..48d31c7bf 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -122,13 +122,18 @@ writeRTF options doc = do
let context = defField "body" body
$ defField "spacer" spacer
$ (if writerTableOfContents options
- then defField "toc" toc
+ then defField "table-of-contents" toc
+ -- for backwards compatibility,
+ -- we populate toc with the contents
+ -- of the toc rather than a boolean:
+ . defField "toc" toc
else id)
$ metadata
- return $ T.pack
- $ case writerTemplate options of
+ T.pack <$>
+ case writerTemplate options of
Just tpl -> renderTemplate' tpl context
- Nothing -> case reverse body of
+ Nothing -> return $
+ case reverse body of
('\n':_) -> body
_ -> body ++ "\n"
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index 27d26c7d9..26070966e 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -85,7 +85,7 @@ writeTEI opts (Pandoc meta blocks) = do
$ metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to TEI.
elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc
@@ -159,11 +159,13 @@ blockToTEI opts (Div (ident,_,_) [Para lst]) = do
let attribs = [("id", ident) | not (null ident)]
inTags False "p" attribs <$> inlinesToTEI opts lst
blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
-blockToTEI _ (Header _ _ _) = return empty
--- should not occur after hierarchicalize
+blockToTEI _ h@(Header _ _ _) = do
+ -- should not occur after hierarchicalize, except inside lists/blockquotes
+ report $ BlockNotRendered h
+ return empty
-- For TEI simple, text must be within containing block element, so
--- we use plainToPara to ensure that Plain text ends up contained by
--- something.
+-- we use treat as Para to ensure that Plain text ends up contained by
+-- something:
blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
-- title beginning with fig: indicates that the image is a figure
--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 387e55290..549d4f3d9 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -32,7 +32,7 @@ Conversion of 'Pandoc' format into Texinfo.
-}
module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Control.Monad.Except (throwError)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (chr, ord)
import Data.List (maximumBy, transpose)
import Data.Ord (comparing)
@@ -106,7 +106,7 @@ pandocToTexinfo options (Pandoc meta blocks) = do
$ metadata
case writerTemplate options of
Nothing -> return body
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
-- | Escape things as needed for Texinfo.
stringToTexinfo :: String -> String
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 091a5baca..acc9eaa0f 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to Textile markup.
Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
-}
module Text.Pandoc.Writers.Textile ( writeTextile ) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (isSpace)
import Data.List (intercalate)
import Data.Text (Text, pack)
@@ -75,7 +75,7 @@ pandocToTextile opts (Pandoc meta blocks) = do
let context = defField "body" main metadata
case writerTemplate opts of
Nothing -> return main
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
withUseTags :: PandocMonad m => TW m a -> TW m a
withUseTags action = do
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 5ee239e59..ced02d4be 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -33,7 +33,7 @@ http://zim-wiki.org/manual/Help/Wiki_Syntax.html
module Text.Pandoc.Writers.ZimWiki ( writeZimWiki ) where
import Control.Monad (zipWithM)
-import Control.Monad.State (StateT, evalStateT, gets, modify)
+import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
import Data.Default (Default (..))
import Data.List (intercalate, isInfixOf, isPrefixOf, transpose)
import qualified Data.Map as Map
@@ -78,7 +78,7 @@ pandocToZimWiki opts (Pandoc meta blocks) = do
$ defField "toc" (writerTableOfContents opts)
$ metadata
case writerTemplate opts of
- Just tpl -> return $ renderTemplate' tpl context
+ Just tpl -> renderTemplate' tpl context
Nothing -> return main
-- | Escape special characters for ZimWiki.