aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs7
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs2
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs2
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs20
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs4
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2774
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs48
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs108
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs10
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs607
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs6
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs5
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs5
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs10
-rw-r--r--src/Text/Pandoc/Readers/RST.hs53
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs11
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs12
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs673
22 files changed, 3177 insertions, 1196 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index bd3c7c356..c1e4d742c 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,6 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Data.Char (toUpper)
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, crFilter)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
@@ -9,7 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Data.Either (rights)
import Data.Generics
import Data.Char (isSpace)
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
@@ -526,7 +526,8 @@ instance Default DBState where
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp
+ let tree = normalizeTree . parseXML . handleInstructions
+ $ T.unpack $ crFilter inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 2757314ab..21aa358f2 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -76,7 +76,7 @@ module Text.Pandoc.Readers.Docx
import Codec.Archive.Zip
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index e6736100f..24615ba94 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -58,7 +58,7 @@ import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import Data.Char (chr, isDigit, ord, readLitChar)
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
index 38f976fd8..b32a73770 100644
--- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -7,7 +7,7 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
, hasStyleName
) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (toLower)
import qualified Data.Map as M
import Text.Pandoc.Readers.Docx.Util
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 94f933c4d..734973e33 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, addMetaField
- , escapeURI, safeRead )
+ , escapeURI, safeRead, crFilter )
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
Extension (Ext_epub_html_exts,
Ext_raw_html, Ext_native_divs, Ext_native_spans))
@@ -53,6 +53,7 @@ import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import qualified Data.Map as M
+import Data.Foldable ( for_ )
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isPrefixOf )
import Data.Char ( isDigit, isLetter, isAlphaNum )
@@ -71,7 +72,7 @@ import Data.Monoid ((<>))
import Text.Parsec.Error
import qualified Data.Set as Set
import Text.Pandoc.Error
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Control.Monad.Except (throwError)
-- | Convert HTML-formatted string to 'Pandoc' document.
@@ -82,7 +83,7 @@ readHtml :: PandocMonad m
readHtml opts inp = do
let tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- inp
+ (crFilter inp)
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -134,6 +135,13 @@ type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m)
type TagParser m = HTMLParser m [Tag Text]
+pHtml :: PandocMonad m => TagParser m Blocks
+pHtml = try $ do
+ (TagOpen "html" attr) <- lookAhead $ pAnyTag
+ for_ (lookup "lang" attr) $
+ updateState . B.setMeta "lang" . B.text . T.unpack
+ pInTags "html" block
+
pBody :: PandocMonad m => TagParser m Blocks
pBody = pInTags "body" block
@@ -162,7 +170,6 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
block :: PandocMonad m => TagParser m Blocks
block = do
- pos <- getPosition
res <- choice
[ eSection
, eSwitch B.para block
@@ -176,13 +183,14 @@ block = do
, pList
, pHrule
, pTable
+ , pHtml
, pHead
, pBody
, pDiv
, pPlain
, pRawHtmlBlock
]
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
@@ -797,6 +805,8 @@ pCloses tagtype = try $ do
(TagClose "dl") | tagtype == "dd" -> return ()
(TagClose "table") | tagtype == "td" -> return ()
(TagClose "table") | tagtype == "tr" -> return ()
+ (TagClose t') | tagtype == "p" && t' `Set.member` blockHtmlTags
+ -> return () -- see #3794
_ -> mzero
pTagText :: PandocMonad m => TagParser m Inlines
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index b22b71b96..a09ed8be9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -27,7 +27,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (splitBy, trim)
+import Text.Pandoc.Shared (splitBy, trim, crFilter)
-- | Parse Haddock markup and return a 'Pandoc' document.
@@ -35,7 +35,7 @@ readHaddock :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack s) of
+readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 17fb48548..a9bafb03b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
@@ -28,20 +31,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Portability : portable
Conversion of LaTeX to 'Pandoc' document.
+
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ applyMacros,
rawLaTeXInline,
rawLaTeXBlock,
- inlineCommand,
+ macro,
+ inlineCommand
) where
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (chr, isAlphaNum, isLetter, ord)
-import Data.Text (Text, unpack)
+import Data.Char (chr, isAlphaNum, isLetter, ord, isDigit)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe (fromMaybe, maybeToList)
import Safe (minimumDef)
import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -52,10 +61,19 @@ import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (many, mathDisplay, mathInline, optional,
- space, (<|>))
+import Text.Pandoc.Parsing hiding (many, optional, withRaw,
+ mathInline, mathDisplay,
+ space, (<|>), spaces, blankline)
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX.Types (Macro(..), Tok(..),
+ TokType(..))
import Text.Pandoc.Walk
+import Text.Pandoc.Error (PandocError(PandocParsecError, PandocMacroLoop))
+
+-- for debugging:
+-- import Text.Pandoc.Extensions (getDefaultExtensions)
+-- import Text.Pandoc.Class (runIOorExplode, PandocIO)
+-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
@@ -63,17 +81,18 @@ readLaTeX :: PandocMonad m
-> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readLaTeX opts ltx = do
- parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx)
+ parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
+ (tokenize (crFilter ltx))
case parsed of
Right result -> return result
- Left e -> throwError e
+ Left e -> throwError $ PandocParsecError (T.unpack ltx) e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
bs <- blocks
eof
st <- getState
- let meta = stateMeta st
+ let meta = sMeta st
let doc' = doc bs
let headerLevel (Header n _ _) = [n]
headerLevel _ = []
@@ -87,177 +106,476 @@ parseLaTeX = do
else id) doc'
return $ Pandoc meta bs'
-type LP m = ParserT String ParserState m
-
-anyControlSeq :: PandocMonad m => LP m String
-anyControlSeq = do
- char '\\'
- next <- option '\n' anyChar
- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
-
-controlSeq :: PandocMonad m => String -> LP m String
-controlSeq name = try $ do
- char '\\'
- case name of
- "" -> mzero
- [c] | not (isLetter c) -> string [c]
- cs -> string cs <* notFollowedBy letter <* optional sp
- return name
-
-dimenarg :: PandocMonad m => LP m String
-dimenarg = try $ do
- ch <- option "" $ string "="
- num <- many1 digit
- dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- return $ ch ++ num ++ dim
+-- testParser :: LP PandocIO a -> Text -> IO a
+-- testParser p t = do
+-- res <- runIOorExplode (runParserT p defaultLaTeXState{
+-- sOptions = def{ readerExtensions =
+-- enableExtension Ext_raw_tex $
+-- getDefaultExtensions "latex" }} "source" (tokenize t))
+-- case res of
+-- Left e -> error (show e)
+-- Right r -> return r
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: Maybe Inlines
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = Nothing
+ , sInListItem = False
+ , sInTableCell = False
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXBlock = do
+ lookAhead (try (char '\\' >> letter))
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- try $
+ withRaw (environment <|> macroDef <|> blockCommand)
+ return raw
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right raw -> takeP (T.length (untokenize raw))
+
+macro :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m Blocks
+macro = do
+ guardEnabled Ext_latex_macros
+ lookAhead (char '\\' *> oneOfStrings ["new", "renew", "provide"] *>
+ oneOfStrings ["command", "environment"])
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawblock = do
+ (_, raw) <- withRaw $ try macroDef
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawblock lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, st) -> do
+ updateState (updateMacros (const $ sMacros st))
+ mempty <$ takeP (T.length (untokenize raw))
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = do
+ (guardEnabled Ext_latex_macros >>
+ do let retokenize = doMacros 0 *> (toksToString <$> getInput)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s') <|> return s
+
+rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => ParserT String s m String
+rawLaTeXInline = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (_, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ takeP (T.length (untokenize raw))
+
+inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand = do
+ lookAhead (try (char '\\' >> letter) <|> char '$')
+ inp <- getInput
+ let toks = tokenize $ T.pack inp
+ let rawinline = do
+ (il, raw) <- try $ withRaw (inlineEnvironment <|> inlineCommand')
+ st <- getState
+ return (il, raw, st)
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT rawinline lstate "source" toks
+ case res of
+ Left _ -> mzero
+ Right (il, raw, s) -> do
+ updateState $ updateMacros (const $ sMacros s)
+ takeP (T.length (untokenize raw))
+ return il
+
+tokenize :: Text -> [Tok]
+tokenize = totoks (1, 1)
+
+totoks :: (Line, Column) -> Text -> [Tok]
+totoks (lin,col) t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok (lin, col) Newline "\n"
+ : totoks (lin + 1,1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok (lin, col) Spaces sps
+ : totoks (lin, col + T.length sps) rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok (lin, col) Word ws
+ : totoks (lin, col + T.length ws) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok (lin, col) Comment ("%" <> cs)
+ : totoks (lin, col + 1 + T.length cs) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok (lin, col) Symbol (T.singleton c)]
+ Just (d, rest')
+ | isLetter d ->
+ let (ws, rest'') = T.span isLetter rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok (lin, col) (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (lin,
+ col + 1 + T.length ws + T.length ss) rest'''
+ | d == '\t' || d == '\n' ->
+ Tok (lin, col) Symbol ("\\")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (lin, col + 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok (lin, col) (Arg i) ("#" <> t1)
+ : totoks (lin, col + 1 + T.length t1) t2
+ Nothing ->
+ Tok (lin, col) Symbol ("#")
+ : totoks (lin, col + 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok (lin, col) Esc2 (T.pack ['^','^',d,e])
+ : totoks (lin, col + 4) rest'''
+ _ ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ | d < '\128' ->
+ Tok (lin, col) Esc1 (T.pack ['^','^',d])
+ : totoks (lin, col + 3) rest''
+ _ -> [Tok (lin, col) Symbol ("^"),
+ Tok (lin, col + 1) Symbol ("^")]
+ _ -> Tok (lin, col) Symbol ("^")
+ : totoks (lin, col + 1) rest
+ | otherwise ->
+ Tok (lin, col) Symbol (T.singleton c) : totoks (lin, col + 1) rest
+
+ where isSpaceOrTab ' ' = True
+ isSpaceOrTab '\t' = True
+ isSpaceOrTab _ = False
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos spos _ (Tok (lin,col) _ _ : _) =
+ setSourceColumn (setSourceLine spos lin) col
+ updatePos spos _ [] = spos
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ when (not verbatimMode) $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro numargs optarg newtoks) -> do
+ setInput ts
+ let getarg = spaces >> braced
+ args <- case optarg of
+ Nothing -> count numargs getarg
+ Just o ->
+ (:) <$> option o bracketedToks
+ <*> count (numargs - 1) getarg
+ let addTok (Tok _ (Arg i) _) acc | i > 0
+ , i <= numargs =
+ map (setpos spos) (args !! (i - 1)) ++ acc
+ addTok t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr addTok ts' newtoks
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+
+setpos :: (Line, Column) -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+ where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+ isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSym
+ where isSym (Tok _ Symbol _) = True
+ isSym _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline
whitespace :: PandocMonad m => LP m ()
-whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
-
-endline :: PandocMonad m => LP m ()
-endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)
+whitespace = () <$ satisfyTok isSpaceTok
+ where isSpaceTok (Tok _ Spaces _) = True
+ isSpaceTok _ = False
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
-tildeEscape :: PandocMonad m => LP m Char
-tildeEscape = try $ do
- string "^^"
- c <- satisfy (\x -> x >= '\0' && x <= '\128')
- d <- if isLowerHex c
- then option "" $ count 1 (satisfy isLowerHex)
- else return ""
- if null d
- then case ord c of
- x | x >= 64 && x <= 127 -> return $ chr (x - 64)
- | otherwise -> return $ chr (x + 64)
- else return $ chr $ read ('0':'x':c:d)
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
comment :: PandocMonad m => LP m ()
-comment = do
- char '%'
- skipMany (satisfy (/='\n'))
- optional newline
- return ()
+comment = () <$ satisfyTok isCommentTok
+ where isCommentTok (Tok _ Comment _) = True
+ isCommentTok _ = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
-bgroup :: PandocMonad m => LP m ()
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
bgroup = try $ do
- skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
- () <$ char '{'
- <|> () <$ controlSeq "bgroup"
- <|> () <$ controlSeq "begingroup"
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-egroup :: PandocMonad m => LP m ()
-egroup = () <$ char '}'
- <|> () <$ controlSeq "egroup"
- <|> () <$ controlSeq "endgroup"
+egroup :: PandocMonad m => LP m Tok
+egroup = (symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup")
-grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
grouped parser = try $ do
bgroup
-- first we check for an inner 'grouped', because
-- {{a,b}} should be parsed the same as {a,b}
- try (grouped parser <* egroup)
- <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m String
-braced = grouped chunk
- where chunk =
- many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
- <|> try (string "\\}")
- <|> try (string "\\{")
- <|> try (string "\\\\")
- <|> ((\x -> "{" ++ x ++ "}") <$> braced)
- <|> count 1 anyChar
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' 1
+ where braced' (n :: Int) =
+ handleEgroup n <|> handleBgroup n <|> handleOther n
+ handleEgroup n = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' (n - 1)
+ handleBgroup n = do
+ t <- bgroup
+ (t:) <$> braced' (n + 1)
+ handleOther n = do
+ t <- anyTok
+ (t:) <$> braced' n
bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
-
-mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
-mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
-
-mathInline :: PandocMonad m => LP m String -> LP m Inlines
-mathInline p = math <$> (try p >>= applyMacros')
-
-mathChars :: PandocMonad m => LP m String
-mathChars =
- concat <$> many (escapedChar
- <|> (snd <$> withRaw braced)
- <|> many1 (satisfy isOrdChar))
- where escapedChar = try $ do char '\\'
- c <- anyChar
- return ['\\',c]
- isOrdChar '$' = False
- isOrdChar '{' = False
- isOrdChar '}' = False
- isOrdChar '\\' = False
- isOrdChar _ = True
-
-quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
-quoted' f starter ender = do
- startchs <- starter
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then do
- ils <- many (notFollowedBy ender >> inline)
- (ender >> return (f (mconcat ils))) <|>
- (<> mconcat ils) <$>
- lit (case startchs of
- "``" -> "“"
- "`" -> "‘"
- _ -> startchs)
- else lit startchs
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
-doubleQuote :: PandocMonad m => LP m Inlines
-doubleQuote = do
- quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
- <|> quoted' doubleQuoted (string "“") (void $ char '”')
- -- the following is used by babel for localized quotes:
- <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
- <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ (T.take 2 (T.reverse s)) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
-singleQuote :: PandocMonad m => LP m Inlines
-singleQuote = do
- smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if smart
- then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
- <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- else str <$> many1 (oneOf "`\'‘’")
+-- inline elements:
-inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> inlineText
- <|> inlineCommand
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (char '-' *> option (str "-")
- (char '-' *> option (str "–") (str "—" <$ char '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (string "''"))
- <|> (str "”" <$ char '”')
- <|> (str "’" <$ char '\'')
- <|> (str "’" <$ char '’')
- <|> (str "\160" <$ char '~')
- <|> mathDisplay (string "$$" *> mathChars <* string "$$")
- <|> mathInline (char '$' *> mathChars <* char '$')
- <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
- <|> (str . (:[]) <$> tildeEscape)
- <|> (do res <- oneOf "#&~^'`\"[]"
- pos <- getPosition
- report $ ParsingUnescaped [res] pos
- return $ str [res])
+word :: PandocMonad m => LP m Inlines
+word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
-inlines :: PandocMonad m => LP m Inlines
-inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+regularSymbol :: PandocMonad m => LP m Inlines
+regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+ where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
+ isRegularSymbol _ = False
+ isSpecial c = c `Set.member` specialChars
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
@@ -268,467 +586,19 @@ inlineGroup = do
-- we need the span so we can detitlecase bibtex entries;
-- we need to know when something is {C}apitalized
-block :: PandocMonad m => LP m Blocks
-block = (mempty <$ comment)
- <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
- <|> environment
- <|> include
- <|> macro
- <|> blockCommand
- <|> paragraph
- <|> grouped block
-
-blocks :: PandocMonad m => LP m Blocks
-blocks = mconcat <$> many block
-
-getRawCommand :: PandocMonad m => String -> LP m String
-getRawCommand name' = do
- rawargs <- withRaw (many (try (optional sp *> opt)) *>
- option "" (try (optional sp *> dimenarg)) *>
- many braced)
- return $ '\\' : name' ++ snd rawargs
-
-lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
-lookupListDefault d = (fromMaybe d .) . lookupList
- where
- lookupList l m = msum $ map (`M.lookup` m) l
-
-blockCommand :: PandocMonad m => LP m Blocks
-blockCommand = try $ do
- name <- anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*" <* optional sp)
- let name' = name ++ star
- let raw = do
- rawcommand <- getRawCommand name'
- transformed <- applyMacros' rawcommand
- guard $ transformed /= rawcommand
- notFollowedBy $ parseFromString' inlines transformed
- parseFromString' blocks transformed
- lookupListDefault raw [name',name] blockCommands
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
--- eat an optional argument and one or more arguments in braces
-ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
-ignoreInlines name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawInline "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-guardRaw :: PandocMonad m => LP m ()
-guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex
-
-optargs :: PandocMonad m => LP m String
-optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
-ignoreBlocks name = (name, p)
- where
- p = do oa <- optargs
- let rawCommand = '\\':name ++ oa
- let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
- doraw <|> ignore rawCommand
-
-blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
-blockCommands = M.fromList $
- [ ("par", mempty <$ skipopts)
- , ("parbox", braced >> grouped blocks)
- , ("title", mempty <$ (skipopts *>
- (grouped inline >>= addMeta "title")
- <|> (grouped block >>= addMeta "title")))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
- , ("author", mempty <$ (skipopts *> authors))
- -- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
- , ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- -- Koma-script metadata commands
- , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
- -- sectioning
- , ("part", section nullAttr (-1))
- , ("part*", section nullAttr (-1))
- , ("chapter", section nullAttr 0)
- , ("chapter*", section ("",["unnumbered"],[]) 0)
- , ("section", section nullAttr 1)
- , ("section*", section ("",["unnumbered"],[]) 1)
- , ("subsection", section nullAttr 2)
- , ("subsection*", section ("",["unnumbered"],[]) 2)
- , ("subsubsection", section nullAttr 3)
- , ("subsubsection*", section ("",["unnumbered"],[]) 3)
- , ("paragraph", section nullAttr 4)
- , ("paragraph*", section ("",["unnumbered"],[]) 4)
- , ("subparagraph", section nullAttr 5)
- , ("subparagraph*", section ("",["unnumbered"],[]) 5)
- -- beamer slides
- , ("frametitle", section nullAttr 3)
- , ("framesubtitle", section nullAttr 4)
- -- letters
- , ("opening", (para . trimInlines) <$> (skipopts *> tok))
- , ("closing", skipopts *> closing)
- --
- , ("hrule", pure horizontalRule)
- , ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> looseItem)
- , ("documentclass", skipopts *> braced *> preamble)
- , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> setCaption)
- , ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- , ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs))
- -- includes
- , ("lstinputlisting", inputListing)
- , ("graphicspath", graphicsPath)
- -- hyperlink
- , ("hypertarget", braced >> grouped block)
- -- LaTeX colors
- , ("textcolor", coloredBlock "color")
- , ("colorbox", coloredBlock "background-color")
- ] ++ map ignoreBlocks
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks
- [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
- -- newcommand, etc. should be parsed by macro, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
- , "bibliographystyle"
- , "maketitle", "makeindex", "makeglossary"
- , "addcontentsline", "addtocontents", "addtocounter"
- -- \ignore{} is used conventionally in literate haskell for definitions
- -- that are to be processed by the compiler but not printed.
- , "ignore"
- , "hyperdef"
- , "markboth", "markright", "markleft"
- , "hspace", "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-coloredBlock :: PandocMonad m => String -> LP m Blocks
-coloredBlock stylename = do
- skipopts
- color <- braced
- let constructor = divWith ("",[],[("style",stylename ++ ": " ++ color)])
- inlineContents <|> constructor <$> blockContents
- where inlineContents = do
- ils <- grouped inline
- rest <- inlines
- return (para (ils <> rest))
- blockContents = grouped block
-
-graphicsPath :: PandocMonad m => LP m Blocks
-graphicsPath = do
- ps <- bgroup *> (manyTill braced egroup)
- getResourcePath >>= setResourcePath . (++ ps)
- return mempty
-
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
-addMeta field val = updateState $ \st ->
- st{ stateMeta = addMetaField field val $ stateMeta st }
-
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-
-setCaption :: PandocMonad m => LP m Blocks
-setCaption = do
- ils <- tok
- mblabel <- option Nothing $
- try $ spaces' >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("data-label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ stateCaption = Just ils' }
- return mempty
-
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ stateCaption = Nothing }
-
-authors :: PandocMonad m => LP m ()
-authors = try $ do
- bgroup
- let oneAuthor = mconcat <$>
- many1 (notFollowedBy' (controlSeq "and") >>
- (inline <|> mempty <$ blockCommand))
- -- skip e.g. \vspace{10pt}
- auths <- sepBy oneAuthor (controlSeq "and")
- egroup
- addMeta "author" (map trimInlines auths)
-
-section :: PandocMonad m => Attr -> Int -> LP m Blocks
-section (ident, classes, kvs) lvl = do
- skipopts
- contents <- grouped inline
- lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
- attr' <- registerHeader (lab, classes, kvs) contents
- return $ headerWith attr' lvl contents
-
-inlineCommand :: PandocMonad m => LP m Inlines
-inlineCommand = try $ do
- (name, raw') <- withRaw anyControlSeq
- guard $ name /= "begin" && name /= "end"
- star <- option "" (string "*")
- let name' = name ++ star
- let raw = do
- guard $ not (isBlockCommand name)
- rawargs <- withRaw
- (skipangles *> skipopts *> option "" dimenarg *> many braced)
- let rawcommand = raw' ++ star ++ snd rawargs
- transformed <- applyMacros' rawcommand
- exts <- getOption readerExtensions
- if transformed /= rawcommand
- then parseFromString' inlines transformed
- else if extensionEnabled Ext_raw_tex exts
- then return $ rawInline "latex" rawcommand
- else ignore rawcommand
- (lookupListDefault raw [name',name] inlineCommands <*
- optional (try (string "{}")))
-
-rawInlineOr :: PandocMonad m => String -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
- parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
- if parseRaw
- then rawInline "latex" <$> getRawCommand name'
- else fallback
-
-isBlockCommand :: String -> Bool
-isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))
-
-
-inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
-inlineEnvironments = M.fromList
- [ ("displaymath", mathEnvWith id Nothing "displaymath")
- , ("math", math <$> mathEnv "math")
- , ("equation", mathEnvWith id Nothing "equation")
- , ("equation*", mathEnvWith id Nothing "equation*")
- , ("gather", mathEnvWith id (Just "gathered") "gather")
- , ("gather*", mathEnvWith id (Just "gathered") "gather*")
- , ("multline", mathEnvWith id (Just "gathered") "multline")
- , ("multline*", mathEnvWith id (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith id (Just "aligned") "align")
- , ("align*", mathEnvWith id (Just "aligned") "align*")
- , ("alignat", mathEnvWith id (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
- ]
-
-inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
-inlineCommands = M.fromList $
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
- , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
- , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
- , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
- , ("texttt", ttfamily)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("ldots", lit "…")
- , ("vdots", lit "\8942")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("label", rawInlineOr "label" (inBrackets <$> tok))
- , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
- , ("textgreek", tok)
- , ("sep", lit ",")
- , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
- , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
- , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", try $ tok >>= accent hungarumlaut)
- , ("`", option (str "`") $ try $ tok >>= accent grave)
- , ("'", option (str "'") $ try $ tok >>= accent acute)
- , ("^", option (str "^") $ try $ tok >>= accent circ)
- , ("~", option (str "~") $ try $ tok >>= accent tilde)
- , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
- , (".", option (str ".") $ try $ tok >>= accent dot)
- , ("=", option (str "=") $ try $ tok >>= accent macron)
- , ("c", option (str "c") $ try $ tok >>= accent cedilla)
- , ("v", option (str "v") $ try $ tok >>= accent hacek)
- , ("u", option (str "u") $ try $ tok >>= accent breve)
- , ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
- , (",", lit "\8198")
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", note <$> grouped block)
- , ("footnote", note <$> grouped block)
- , ("verb", doverb)
- , ("lstinline", dolstinline)
- , ("Verb", doverb)
- , ("url", (unescapeURL <$> braced) >>= \url ->
- pure (link url "" (str url)))
- , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
- tok >>= \lab ->
- pure (link url "" lab))
- , ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . removeDoubleQuotes <$> braced
- mkImage options src)
- , ("enquote", enquote)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- , ("hypertarget", braced >> tok)
- -- siuntix
- , ("SI", dosiunitx)
- -- hyphenat
- , ("bshyp", lit "\\\173")
- , ("fshyp", lit "/\173")
- , ("dothyp", lit ".\173")
- , ("colonhyp", lit ":\173")
- , ("hyp", lit "-")
- , ("nohyphens", tok)
- , ("textnhtt", ttfamily)
- , ("nhttfamily", ttfamily)
- -- LaTeX colors
- , ("textcolor", coloredInline "color")
- , ("colorbox", coloredInline "background-color")
- -- fontawesome
- , ("faCheck", lit "\10003")
- , ("faClose", lit "\10007")
- ] ++ map ignoreInlines
- -- these commands will be ignored unless --parse-raw is specified,
- -- in which case they will appear as raw latex blocks:
- [ "index"
- , "hspace"
- , "vspace"
- , "newpage"
- , "clearpage"
- , "pagebreak"
- ]
-
-coloredInline :: PandocMonad m => String -> LP m Inlines
-coloredInline stylename = do
- skipopts
- color <- braced
- spanWith ("",[],[("style",stylename ++ ": " ++ color)]) <$> tok
-
-ttfamily :: PandocMonad m => LP m Inlines
-ttfamily = (code . stringify . toList) <$> tok
+doLHSverb :: PandocMonad m => LP m Inlines
+doLHSverb =
+ (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
- let replaceTextwidth (k,v) = case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
- _ -> (k, v)
- let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let replaceTextwidth (k,v) =
+ case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth
+ $ filter (\(k,_) -> k `elem` ["width", "height"]) options
let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
@@ -737,56 +607,131 @@ mkImage options src = do
return $ imageWith attr (addExtension src defaultExt) "" alt
_ -> return $ imageWith attr src "" alt
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
+-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
+dosiunitx :: PandocMonad m => LP m Inlines
+dosiunitx = do
+ skipopts
+ value <- tok
+ valueprefix <- option "" $ bracketed tok
+ unit <- tok
+ let emptyOr160 "" = ""
+ emptyOr160 _ = "\160"
+ return . mconcat $ [valueprefix,
+ emptyOr160 valueprefix,
+ value,
+ emptyOr160 unit,
+ unit]
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
+lit :: String -> LP m Inlines
+lit = pure . str
+
+removeDoubleQuotes :: Text -> Text
+removeDoubleQuotes t =
+ maybe t id $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+
+doubleQuote :: PandocMonad m => LP m Inlines
+doubleQuote = do
+ quoted' doubleQuoted (try $ count 2 $ symbol '`')
+ (void $ try $ count 2 $ symbol '\'')
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '“') (void $ symbol '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ sequence [symbol '"', symbol '`'])
+ (void $ try $ sequence [symbol '"', symbol '\''])
+ <|> quoted' doubleQuoted ((:[]) <$> symbol '"')
+ (void $ symbol '"')
+
+singleQuote :: PandocMonad m => LP m Inlines
+singleQuote = do
+ quoted' singleQuoted ((:[]) <$> symbol '`')
+ (try $ symbol '\'' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ <|> quoted' singleQuoted ((:[]) <$> symbol '‘')
+ (try $ symbol '’' >>
+ notFollowedBy (satisfyTok startsWithLetter))
+ where startsWithLetter (Tok _ Word t) =
+ case T.uncons t of
+ Just (c, _) | isLetter c -> True
+ _ -> False
+ startsWithLetter _ = False
+
+quoted' :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> LP m [Tok]
+ -> LP m ()
+ -> LP m Inlines
+quoted' f starter ender = do
+ startchs <- (T.unpack . untokenize) <$> starter
+ smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
+ if smart
+ then do
+ ils <- many (notFollowedBy ender >> inline)
+ (ender >> return (f (mconcat ils))) <|>
+ (<> mconcat ils) <$>
+ lit (case startchs of
+ "``" -> "“"
+ "`" -> "‘"
+ cs -> cs)
+ else lit startchs
enquote :: PandocMonad m => LP m Inlines
enquote = do
skipopts
- context <- stateQuoteContext <$> getState
- if context == InDoubleQuote
+ quoteContext <- sQuoteContext <$> getState
+ if quoteContext == InDoubleQuote
then singleQuoted <$> withQuoteContext InSingleQuote tok
else doubleQuoted <$> withQuoteContext InDoubleQuote tok
doverb :: PandocMonad m => LP m Inlines
doverb = do
- marker <- anyChar
- code <$> manyTill (satisfy (/='\n')) (char marker)
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ (code . T.unpack . untokenize) <$>
+ manyTill (verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok (lin, col) toktype txt) <- satisfyTok (not . isNewlineTok)
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (lin, col + i) Symbol (T.singleton stopchar)
+ : (totoks (lin, col + i + 1) (T.drop 1 t2)) ++ inp
+ return $ Tok (lin, col) toktype t1
dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
options <- option [] keyvals
let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
- marker <- char '{' <|> anyChar
- codeWith ("",classes,[]) <$> manyTill (satisfy (/='\n')) (char '}' <|> char marker)
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ (codeWith ("",classes,[]) . T.unpack . untokenize) <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
-doLHSverb :: PandocMonad m => LP m Inlines
-doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
-
--- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
-dosiunitx :: PandocMonad m => LP m Inlines
-dosiunitx = do
- skipopts
- value <- tok
- valueprefix <- option "" $ char '[' >> (mconcat <$> manyTill tok (char ']'))
- unit <- tok
- let emptyOr160 "" = ""
- emptyOr160 _ = "\160"
- return . mconcat $ [valueprefix,
- emptyOr160 valueprefix,
- value,
- emptyOr160 unit,
- unit]
+keyval :: PandocMonad m => LP m (String, String)
+keyval = try $ do
+ Tok _ Word key <- satisfyTok isWordTok
+ let isSpecSym (Tok _ Symbol t) = t `elem` [".",":","-","|","\\"]
+ isSpecSym _ = False
+ val <- option [] $ do
+ symbol '='
+ braced <|> (many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
+ <|> anyControlSeq))
+ optional sp
+ optional (symbol ',')
+ optional sp
+ return (T.unpack key, T.unpack . untokenize $ val)
-lit :: String -> LP m Inlines
-lit = pure . str
+keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
@@ -994,18 +939,149 @@ breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c = [c]
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+mathDisplay :: String -> Inlines
+mathDisplay = displayMath . trim
+
+mathInline :: String -> Inlines
+mathInline = math . trim
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ contents <- trim . toksToString <$>
+ many (notFollowedBy (symbol '$') >> anyTok)
+ if display
+ then do
+ mathDisplay contents <$ try (symbol '$' >> symbol '$')
+ <|> (guard (null contents) >> return (mathInline ""))
+ else mathInline contents <$ (symbol '$')
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: PandocMonad m => LP m [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> (manyTill citationLabel egroup)
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+citationLabel :: PandocMonad m => LP m String
+citationLabel = do
+ optional sp
+ toksToString <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* optional sp
+ <* optional (symbol ',')
+ <* optional sp)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-[]" :: [Char]
+
+cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+
+citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ (toksToString raw))
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
+complexNatbibCitation mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` (symbol ';')
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
+inlineCommand' :: PandocMonad m => LP m Inlines
+inlineCommand' = try $ do
+ Tok _ (CtrlSeq name) cmd <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name] -- check non-starred as fallback
+ let raw = do
+ guard $ isInlineCommand name || not (isBlockCommand name)
+ rawcommand <- getRawCommand (cmd <> star)
+ (guardEnabled Ext_raw_tex >> return (rawInline "latex" rawcommand))
+ <|> ignore rawcommand
+ lookupListDefault raw names inlineCommands
+
tok :: PandocMonad m => LP m Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
+tok = grouped inline <|> inlineCommand' <|> singleChar
+ where singleChar = try $ do
+ Tok (lin,col) toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ (Tok (lin, col + 1) toktype t2) : inp
+ return $ str (T.unpack t1)
+ else return $ str (T.unpack t)
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline
-rawopt :: PandocMonad m => LP m String
+rawopt :: PandocMonad m => LP m Text
rawopt = do
- contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
- try (string "\\[") <|> rawopt)
+ symbol '['
+ inner <- untokenize <$> manyTill anyTok (symbol ']')
optional sp
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> inner <> "]"
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt
@@ -1013,58 +1089,719 @@ skipopts = skipMany rawopt
-- opts in angle brackets are used in beamer
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
- char '<'
- skipMany (noneOf ">")
- char '>'
- return ()
+ symbol '<'
+ () <$ manyTill anyTok (symbol '>')
skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle
-inlineText :: PandocMonad m => LP m Inlines
-inlineText = str <$> many1 inlineChar
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (0,0) Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
+ "\\end{" ++ T.unpack y ++ "}"
+
+mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ T.unpack $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ ]
+
+inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineCommands = M.fromList $
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+ , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+ , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+ , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+ , ("texttt", ttfamily)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", rawInlineOr "label" (inBrackets <$> tok))
+ , ("ref", rawInlineOr "ref" (inBrackets <$> tok))
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", rawInlineOr "cref" (inBrackets <$> tok)) -- from cleveref.sty
+ , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", try $ tok >>= accent hungarumlaut)
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent circ)
+ , ("~", option (str "~") $ try $ tok >>= accent tilde)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , (".", option (str ".") $ try $ tok >>= accent dot)
+ , ("=", option (str "=") $ try $ tok >>= accent macron)
+ , ("c", option (str "c") $ try $ tok >>= accent cedilla)
+ , ("v", option (str "v") $ try $ tok >>= accent hacek)
+ , ("u", option (str "u") $ try $ tok >>= accent breve)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional (bracketed inline)
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", note <$> grouped block)
+ , ("footnote", note <$> grouped block)
+ , ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("Verb", doverb)
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ pure (link url "" (str url)))
+ , ("href", (unescapeURL . toksToString <$>
+ braced <* optional sp) >>= \url ->
+ tok >>= \lab -> pure (link url "" lab))
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL . T.unpack .
+ removeDoubleQuotes . untokenize <$> braced
+ mkImage options src)
+ , ("enquote", enquote)
+ , ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ , ("hypertarget", braced >> tok)
+ -- siuntix
+ , ("SI", dosiunitx)
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ , ("nohyphens", tok)
+ , ("textnhtt", ttfamily)
+ , ("nhttfamily", ttfamily)
+ -- LaTeX colors
+ , ("textcolor", coloredInline "color")
+ , ("colorbox", coloredInline "background-color")
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ ]
+
+coloredInline :: PandocMonad m => String -> LP m Inlines
+coloredInline stylename = do
+ skipopts
+ color <- braced
+ spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok
+
+ttfamily :: PandocMonad m => LP m Inlines
+ttfamily = (code . stringify . toList) <$> tok
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name'
+ else fallback
+
+getRawCommand :: PandocMonad m => Text -> LP m String
+getRawCommand txt = do
+ (_, rawargs) <- withRaw
+ ((if txt == "\\write"
+ then () <$ satisfyTok isWordTok -- digits
+ else return ()) *>
+ skipangles *>
+ skipopts *>
+ option "" (try (optional sp *> dimenarg)) *>
+ many braced)
+ return $ T.unpack (txt <> untokenize rawargs)
+
+isBlockCommand :: Text -> Bool
+isBlockCommand s =
+ s `M.member` (blockCommands :: M.Map Text (LP PandocPure Blocks))
+ || s `Set.member` treatAsBlock
+
+treatAsBlock :: Set.Set Text
+treatAsBlock = Set.fromList
+ [ "newcommand", "renewcommand"
+ , "newenvironment", "renewenvironment"
+ , "providecommand", "provideenvironment"
+ -- newcommand, etc. should be parsed by macroDef, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "markboth", "markright", "markleft"
+ , "hspace", "vspace"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+isInlineCommand :: Text -> Bool
+isInlineCommand s =
+ s `M.member` (inlineCommands :: M.Map Text (LP PandocPure Inlines))
+ || s `Set.member` treatAsInline
+
+treatAsInline :: Set.Set Text
+treatAsInline = Set.fromList
+ [ "index"
+ , "hspace"
+ , "vspace"
+ , "noindent"
+ , "newpage"
+ , "clearpage"
+ , "pagebreak"
+ ]
+
+lookupListDefault :: (Show k, Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where lookupList l m = msum $ map (`M.lookup` m) l
+
+inline :: PandocMonad m => LP m Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ whitespace)
+ <|> (softbreak <$ endline)
+ <|> word
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ <|> (symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-')))
+ <|> doubleQuote
+ <|> singleQuote
+ <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
+ <|> (str "”" <$ symbol '”')
+ <|> (str "’" <$ symbol '\'')
+ <|> (str "’" <$ symbol '’')
+ <|> (str "\160" <$ symbol '~')
+ <|> dollarsMath
+ <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
+ <|> (str . (:[]) <$> primEscape)
+ <|> regularSymbol
+ <|> (do res <- symbolIn "#^'`\"[]"
+ pos <- getPosition
+ let s = T.unpack (untoken res)
+ report $ ParsingUnescaped s pos
+ return $ str s)
+
+inlines :: PandocMonad m => LP m Inlines
+inlines = mconcat <$> many inline
+
+-- block elements:
+
+begin_ :: PandocMonad m => Text -> LP m ()
+begin_ t = (try $ do
+ controlSeq "begin"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard (t == txt)) <?> ("\\begin{" ++ T.unpack t ++ "}")
+
+end_ :: PandocMonad m => Text -> LP m ()
+end_ t = (try $ do
+ controlSeq "end"
+ spaces
+ symbol '{'
+ spaces
+ Tok _ Word txt <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ guard $ t == txt) <?> ("\\end{" ++ T.unpack t ++ "}")
+
+preamble :: PandocMonad m => LP m Blocks
+preamble = mempty <$ many preambleBlock
+ where preambleBlock = spaces1
+ <|> void include
+ <|> void macroDef
+ <|> void blockCommand
+ <|> void braced
+ <|> (notFollowedBy (begin_ "document") >> void anyTok)
-inlineChar :: PandocMonad m => LP m Char
-inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"
+paragraph :: PandocMonad m => LP m Blocks
+paragraph = do
+ x <- trimInlines . mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para x
+
+include :: PandocMonad m => LP m Blocks
+include = do
+ (Tok _ (CtrlSeq name) _) <-
+ controlSeq "include" <|> controlSeq "input" <|>
+ controlSeq "subfile" <|> controlSeq "usepackage"
+ skipMany $ bracketed inline -- skip options
+ fs <- (map trim . splitBy (==',') . T.unpack . untokenize) <$> braced
+ let fs' = if name == "usepackage"
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
+ dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mconcat <$> mapM (insertIncludedFile blocks (tokenize . T.pack) dirs) fs'
+
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
+
+authors :: PandocMonad m => LP m ()
+authors = try $ do
+ bgroup
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >>
+ (inline <|> mempty <$ blockCommand))
+ -- skip e.g. \vspace{10pt}
+ auths <- sepBy oneAuthor (controlSeq "and")
+ egroup
+ addMeta "author" (map trimInlines auths)
+
+macroDef :: PandocMonad m => LP m Blocks
+macroDef = do
+ guardEnabled Ext_latex_macros
+ mempty <$ ((commandDef <|> environmentDef) <* doMacros 0)
+ where commandDef = do
+ (name, macro') <- newcommand
+ updateState $ \s -> s{ sMacros = M.insert name macro' (sMacros s) }
+ environmentDef = do
+ (name, macro1, macro2) <- newenvironment
+ updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand"
+ optional $ symbol '*'
+ Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents <- braced
+ when (mtype == "newcommand") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ optional $ symbol '*'
+ symbol '{'
+ spaces
+ Tok _ Word name <- satisfyTok isWordTok
+ spaces
+ symbol '}'
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ startcontents <- braced
+ spaces
+ endcontents <- braced
+ when (mtype == "newenvironment") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
+ Nothing -> return ()
+ return (name, Macro numargs optarg startcontents,
+ Macro 0 Nothing endcontents)
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ manyTill anyTok (symbol ']')
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead (T.unpack ds) of
+ Just i -> return i
+ _ -> return 0
+
+setCaption :: PandocMonad m => LP m Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ sCaption = Just ils' }
+ return mempty
+
+looseItem :: PandocMonad m => LP m Blocks
+looseItem = do
+ inListItem <- sInListItem <$> getState
+ guard $ not inListItem
+ skipopts
+ return mempty
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
+ skipopts
+ contents <- grouped inline
+ lab <- option ident $
+ try (spaces >> controlSeq "label"
+ >> spaces >> toksToString <$> braced)
+ attr' <- registerHeader (lab, classes, kvs) contents
+ return $ headerWith attr' lvl contents
+
+blockCommand :: PandocMonad m => LP m Blocks
+blockCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name /= "begin" && name /= "end"
+ star <- option "" ("*" <$ symbol '*' <* optional sp)
+ let name' = name <> star
+ let names = ordNub [name', name]
+ let raw = do
+ guard $ isBlockCommand name || not (isInlineCommand name)
+ rawBlock "latex" <$> getRawCommand (txt <> star)
+ lookupListDefault raw names blockCommands
+
+closing :: PandocMonad m => LP m Blocks
+closing = do
+ contents <- tok
+ st <- getState
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (sMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
+ return $ para (trimInlines contents) <> sigs
+
+blockCommands :: PandocMonad m => M.Map Text (LP m Blocks)
+blockCommands = M.fromList $
+ [ ("par", mempty <$ skipopts)
+ , ("parbox", braced >> grouped blocks)
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
+ -- Koma-script metadata commands
+ , ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
+ -- sectioning
+ , ("part", section nullAttr (-1))
+ , ("part*", section nullAttr (-1))
+ , ("chapter", section nullAttr 0)
+ , ("chapter*", section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
+ -- beamer slides
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ --
+ , ("hrule", pure horizontalRule)
+ , ("strut", pure mempty)
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("item", looseItem)
+ , ("documentclass", skipopts *> braced *> preamble)
+ , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
+ , ("caption", skipopts *> setCaption)
+ , ("bibliography", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ , ("addbibresource", mempty <$ (skipopts *> braced >>=
+ addMeta "bibliography" . splitBibs . toksToString))
+ -- includes
+ , ("lstinputlisting", inputListing)
+ , ("graphicspath", graphicsPath)
+ -- hyperlink
+ , ("hypertarget", try $ braced >> grouped block)
+ -- LaTeX colors
+ , ("textcolor", coloredBlock "color")
+ , ("colorbox", coloredBlock "background-color")
+ ]
+
+
+environments :: PandocMonad m => M.Map Text (LP m Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks)
+ , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
+ , ("letter", env "letter" letterContents)
+ , ("minipage", env "minipage" $
+ skipopts *> spaces *> optional braced *> spaces *> blocks)
+ , ("figure", env "figure" $ skipopts *> figure)
+ , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
+ , ("center", env "center" blocks)
+ , ("longtable", env "longtable" $
+ resetCaption *> simpTable "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ resetCaption *> skipopts *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular" $ simpTable "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable "tabular" False)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", orderedList')
+ , ("alltt", alltt <$> env "alltt" blocks)
+ , ("code", guardEnabled Ext_literate_haskell *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("comment", mempty <$ verbEnv "comment")
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
+ , ("Verbatim", fancyverbEnv "Verbatim")
+ , ("BVerbatim", fancyverbEnv "BVerbatim")
+ , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
+ codeBlockWith attr <$> verbEnv "lstlisting")
+ , ("minted", minted)
+ , ("obeylines", obeylines)
+ , ("displaymath", mathEnvWith para Nothing "displaymath")
+ , ("equation", mathEnvWith para Nothing "equation")
+ , ("equation*", mathEnvWith para Nothing "equation*")
+ , ("gather", mathEnvWith para (Just "gathered") "gather")
+ , ("gather*", mathEnvWith para (Just "gathered") "gather*")
+ , ("multline", mathEnvWith para (Just "gathered") "multline")
+ , ("multline*", mathEnvWith para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith para (Just "aligned") "align")
+ , ("align*", mathEnvWith para (Just "aligned") "align*")
+ , ("alignat", mathEnvWith para (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
+ , ("tikzpicture", rawVerbEnv "tikzpicture")
+ ]
environment :: PandocMonad m => LP m Blocks
environment = do
controlSeq "begin"
- name <- braced
+ name <- untokenize <$> braced
M.findWithDefault mzero name environments
<|> rawEnv name
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
- controlSeq "begin"
- name <- braced
- M.findWithDefault mzero name inlineEnvironments
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
-rawEnv :: PandocMonad m => String -> LP m Blocks
+rawEnv :: PandocMonad m => Text -> LP m Blocks
rawEnv name = do
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
rawOptions <- mconcat <$> many rawopt
- let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
+ let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
pos1 <- getPosition
(bs, raw) <- withRaw $ env name blocks
- raw' <- applyMacros' $ beginCommand ++ raw
- if raw' /= beginCommand ++ raw
- then parseFromString' blocks raw'
- else if parseRaw
- then return $ rawBlock "latex" $ beginCommand ++ raw'
- else do
- unless parseRaw $ do
- report $ SkippedContent beginCommand pos1
- pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
- return bs
-
-rawVerbEnv :: PandocMonad m => String -> LP m Blocks
+ if parseRaw
+ then return $ rawBlock "latex"
+ $ T.unpack $ beginCommand <> untokenize raw
+ else do
+ unless parseRaw $ do
+ report $ SkippedContent (T.unpack beginCommand) pos1
+ pos2 <- getPosition
+ report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ return bs
+
+rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{tikzpicture}" ++ raw
+ let raw' = "\\begin{tikzpicture}" ++ toksToString raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -1073,36 +1810,118 @@ rawVerbEnv name = do
report $ SkippedContent raw' pos
return mempty
-----
+verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv name = withVerbatimMode $ do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ toksToString res
-maybeAddExtension :: String -> FilePath -> FilePath
-maybeAddExtension ext fp =
- if null (takeExtension fp)
- then addExtension fp ext
- else fp
+fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
+fancyverbEnv name = do
+ options <- option [] keyvals
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv name
-include :: PandocMonad m => LP m Blocks
-include = do
- fs' <- try $ do
- char '\\'
- name <- try (string "include")
- <|> try (string "input")
- <|> try (string "subfile")
- <|> string "usepackage"
- -- skip options
- skipMany $ try $ char '[' *> manyTill anyChar (char ']')
- fs <- (map trim . splitBy (==',')) <$> braced
- return $ if name == "usepackage"
- then map (maybeAddExtension ".sty") fs
- else map (maybeAddExtension ".tex") fs
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mconcat <$> mapM (insertIncludedFile blocks dirs) fs'
+obeylines :: PandocMonad m => LP m Blocks
+obeylines = do
+ para . fromList . removeLeadingTrailingBreaks .
+ walk softBreakToHard . toList <$> env "obeylines" inlines
+ where softBreakToHard SoftBreak = LineBreak
+ softBreakToHard x = x
+ removeLeadingTrailingBreaks = reverse . dropWhile isLineBreak .
+ reverse . dropWhile isLineBreak
+ isLineBreak LineBreak = True
+ isLineBreak _ = False
+
+minted :: PandocMonad m => LP m Blocks
+minted = do
+ options <- option [] keyvals
+ lang <- toksToString <$> braced
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ let classes = [ lang | not (null lang) ] ++
+ [ "numberLines" |
+ lookup "linenos" options == Just "true" ]
+ let attr = ("",classes,kvs)
+ codeBlockWith attr <$> verbEnv "minted"
+
+letterContents :: PandocMonad m => LP m Blocks
+letterContents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case lookupMeta "address" (sMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
+ return $ addr <> bs -- sig added by \closing
+
+figure :: PandocMonad m => LP m Blocks
+figure = try $ do
+ resetCaption
+ blocks >>= addImageCaption
+
+addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
+addImageCaption = walkM go
+ where go (Image attr alt (src,tit))
+ | not ("fig:" `isPrefixOf` tit) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
+ Nothing -> Image attr alt (src,tit)
+ go x = return x
+
+coloredBlock :: PandocMonad m => String -> LP m Blocks
+coloredBlock stylename = do
+ skipopts
+ color <- braced
+ let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)])
+ inlineContents <|> constructor <$> blockContents
+ where inlineContents = do
+ ils <- grouped inline
+ rest <- inlines
+ return (para (ils <> rest))
+ blockContents = grouped block
+
+graphicsPath :: PandocMonad m => LP m Blocks
+graphicsPath = do
+ ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ getResourcePath >>= setResourcePath . (++ ps)
+ return mempty
+
+splitBibs :: String -> [Inlines]
+splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+
+alltt :: Blocks -> Blocks
+alltt = walk strToCode
+ where strToCode (Str s) = Code nullAttr s
+ strToCode Space = RawInline (Format "latex") "\\ "
+ strToCode SoftBreak = LineBreak
+ strToCode x = x
+
+parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions options =
+ let kvs = [ (if k == "firstnumber"
+ then "startFrom"
+ else k, v) | (k,v) <- options ]
+ classes = [ "numberLines" |
+ lookup "numbers" options == Just "left" ]
+ ++ maybeToList (lookup "language" options
+ >>= fromListingsLanguage)
+ in (fromMaybe "" (lookup "label" options), classes, kvs)
inputListing :: PandocMonad m => LP m Blocks
inputListing = do
pos <- getPosition
options <- option [] keyvals
- f <- filter (/='"') <$> braced
+ f <- filter (/='"') . toksToString <$> braced
dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
mbCode <- readFileFromDirs dirs f
codeLines <- case mbCode of
@@ -1121,169 +1940,10 @@ inputListing = do
drop (firstline - 1) codeLines
return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents
-parseListingsOptions :: [(String, String)] -> Attr
-parseListingsOptions options =
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- ++ maybeToList (lookup "language" options
- >>= fromListingsLanguage)
- in (fromMaybe "" (lookup "label" options), classes, kvs)
-
-----
-
-keyval :: PandocMonad m => LP m (String, String)
-keyval = try $ do
- key <- many1 alphaNum
- val <- option "" $ char '=' >> braced <|> (many1 (alphaNum <|> oneOf ".:-|\\"))
- skipMany spaceChar
- optional (char ',')
- skipMany spaceChar
- return (key, val)
-
-
-keyvals :: PandocMonad m => LP m [(String, String)]
-keyvals = try $ char '[' *> manyTill keyval (char ']')
-
-alltt :: PandocMonad m => String -> LP m Blocks
-alltt t = walk strToCode <$> parseFromString' blocks
- (substitute " " "\\ " $ substitute "%" "\\%" $
- intercalate "\\\\\n" $ lines t)
- where strToCode (Str s) = Code nullAttr s
- strToCode x = x
-
-rawLaTeXBlock :: PandocMonad m => LP m String
-rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-
-rawLaTeXInline :: PandocMonad m => LP m Inline
-rawLaTeXInline = do
- raw <- (snd <$> withRaw inlineCommand)
- <|> (snd <$> withRaw inlineEnvironment)
- <|> (snd <$> withRaw blockCommand)
- RawInline "latex" <$> applyMacros' raw
-
-addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
-addImageCaption = walkM go
- where go (Image attr alt (src,tit))
- | not ("fig:" `isPrefixOf` tit) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
- Nothing -> Image attr alt (src,tit)
- go x = return x
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table c als ws hs rs) = do
- mbcapt <- stateCaption <$> getState
- return $ case mbcapt of
- Just ils -> Table (toList ils) als ws hs rs
- Nothing -> Table c als ws hs rs
- go x = return x
-
-environments :: PandocMonad m => M.Map String (LP m Blocks)
-environments = M.fromList
- [ ("document", env "document" blocks <* skipMany anyChar)
- , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
- , ("letter", env "letter" letterContents)
- , ("minipage", env "minipage" $
- skipopts *> spaces' *> optional braced *> spaces' *> blocks)
- , ("figure", env "figure" $ skipopts *> figure)
- , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
- , ("center", env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable "longtable" False >>= addTableCaption)
- , ("table", env "table" $
- resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular" $ simpTable "tabular*" True)
- , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
- , ("tabular", env "tabular" $ simpTable "tabular" False)
- , ("quote", blockQuote <$> env "quote" blocks)
- , ("quotation", blockQuote <$> env "quotation" blocks)
- , ("verse", blockQuote <$> env "verse" blocks)
- , ("itemize", bulletList <$> listenv "itemize" (many item))
- , ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", orderedList')
- , ("alltt", alltt =<< verbEnv "alltt")
- , ("code", guardEnabled Ext_literate_haskell *>
- (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
- verbEnv "code"))
- , ("comment", mempty <$ verbEnv "comment")
- , ("verbatim", codeBlock <$> verbEnv "verbatim")
- , ("Verbatim", fancyverbEnv "Verbatim")
- , ("BVerbatim", fancyverbEnv "BVerbatim")
- , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
- codeBlockWith attr <$> verbEnv "lstlisting")
- , ("minted", do options <- option [] keyvals
- lang <- grouped (many1 $ satisfy (/='}'))
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
- [ "numberLines" |
- lookup "linenos" options == Just "true" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv "minted")
- , ("obeylines", parseFromString
- (para . trimInlines . mconcat <$> many inline) =<<
- intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnvWith para Nothing "displaymath")
- , ("equation", mathEnvWith para Nothing "equation")
- , ("equation*", mathEnvWith para Nothing "equation*")
- , ("gather", mathEnvWith para (Just "gathered") "gather")
- , ("gather*", mathEnvWith para (Just "gathered") "gather*")
- , ("multline", mathEnvWith para (Just "gathered") "multline")
- , ("multline*", mathEnvWith para (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith para (Just "aligned") "align")
- , ("align*", mathEnvWith para (Just "aligned") "align*")
- , ("alignat", mathEnvWith para (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
- , ("tikzpicture", rawVerbEnv "tikzpicture")
- ]
-
-figure :: PandocMonad m => LP m Blocks
-figure = try $ do
- resetCaption
- blocks >>= addImageCaption
-
-letterContents :: PandocMonad m => LP m Blocks
-letterContents = do
- bs <- blocks
- st <- getState
- -- add signature (author) and address (title)
- let addr = case lookupMeta "address" (stateMeta st) of
- Just (MetaBlocks [Plain xs]) ->
- para $ trimInlines $ fromList xs
- _ -> mempty
- return $ addr <> bs -- sig added by \closing
-
-closing :: PandocMonad m => LP m Blocks
-closing = do
- contents <- tok
- st <- getState
- let extractInlines (MetaBlocks [Plain ys]) = ys
- extractInlines (MetaBlocks [Para ys ]) = ys
- extractInlines _ = []
- let sigs = case lookupMeta "author" (stateMeta st) of
- Just (MetaList xs) ->
- para $ trimInlines $ fromList $
- intercalate [LineBreak] $ map extractInlines xs
- _ -> mempty
- return $ para (trimInlines contents) <> sigs
+-- lists
item :: PandocMonad m => LP m Blocks
-item = blocks *> controlSeq "item" *> skipopts *> blocks
-
-looseItem :: PandocMonad m => LP m Blocks
-looseItem = do
- ctx <- stateParserContext `fmap` getState
- if ctx == ListItemState
- then mzero
- else return mempty
+item = void blocks *> controlSeq "item" *> skipopts *> blocks
descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
@@ -1294,302 +1954,210 @@ descItem = do
bs <- blocks
return (ils, [bs])
-env :: PandocMonad m => String -> LP m a -> LP m a
-env name p = p <*
- (try (controlSeq "end" *> braced >>= guard . (== name))
- <?> ("\\end{" ++ name ++ "}"))
-
-listenv :: PandocMonad m => String -> LP m a -> LP m a
+listenv :: PandocMonad m => Text -> LP m a -> LP m a
listenv name p = try $ do
- oldCtx <- stateParserContext `fmap` getState
- updateState $ \st -> st{ stateParserContext = ListItemState }
+ oldInListItem <- sInListItem `fmap` getState
+ updateState $ \st -> st{ sInListItem = True }
res <- env name p
- updateState $ \st -> st{ stateParserContext = oldCtx }
+ updateState $ \st -> st{ sInListItem = oldInListItem }
return res
-mathEnvWith :: PandocMonad m
- => (Inlines -> a) -> Maybe String -> String -> LP m a
-mathEnvWith f innerEnv name = f <$> mathDisplay (inner <$> mathEnv name)
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
- "\\end{" ++ y ++ "}"
-
-mathEnv :: PandocMonad m => String -> LP m String
-mathEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = skipMany comment *>
- (many1 (noneOf "\\%") <|> try (string "\\%")
- <|> try (string "\\\\") <|> count 1 anyChar)
- res <- concat <$> manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-verbEnv :: PandocMonad m => String -> LP m String
-verbEnv name = do
- skipopts
- optional blankline
- let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
- charMuncher = anyChar
- res <- manyTill charMuncher endEnv
- return $ stripTrailingNewlines res
-
-fancyverbEnv :: PandocMonad m => String -> LP m Blocks
-fancyverbEnv name = do
- options <- option [] keyvals
- let kvs = [ (if k == "firstnumber"
- then "startFrom"
- else k, v) | (k,v) <- options ]
- let classes = [ "numberLines" |
- lookup "numbers" options == Just "left" ]
- let attr = ("",classes,kvs)
- codeBlockWith attr <$> verbEnv name
-
orderedList' :: PandocMonad m => LP m Blocks
orderedList' = try $ do
- optional sp
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
- optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
+ let markerSpec = do
+ symbol '['
+ ts <- toksToString <$> manyTill anyTok (symbol ']')
+ case runParser anyOrderedListMarker def "option" ts of
+ Right r -> return r
+ Left _ -> do
+ pos <- getPosition
+ report $ SkippedContent ("[" ++ ts ++ "]") pos
+ return (1, DefaultStyle, DefaultDelim)
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
- start <- option 1 $ try $ do controlSeq "setcounter"
- grouped (string "enum" *> many1 (oneOf "iv"))
+ optional $ try $ controlSeq "setlength"
+ *> grouped (count 1 $ controlSeq "itemindent")
+ *> braced
+ spaces
+ start <- option 1 $ try $ do pos <- getPosition
+ controlSeq "setcounter"
+ ctr <- toksToString <$> braced
+ guard $ "enum" `isPrefixOf` ctr
+ guard $ all (`elem` ['i','v']) (drop 4 ctr)
optional sp
- num <- grouped (many1 digit)
- spaces
- return (read num + 1 :: Int)
+ num <- toksToString <$> braced
+ case safeRead num of
+ Just i -> return (i + 1 :: Int)
+ Nothing -> do
+ report $ SkippedContent
+ ("\\setcounter{" ++ ctr ++
+ "}{" ++ num ++ "}") pos
+ return 1
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
-paragraph :: PandocMonad m => LP m Blocks
-paragraph = do
- x <- trimInlines . mconcat <$> many1 inline
- if x == mempty
- then return mempty
- else return $ para x
-
-preamble :: PandocMonad m => LP m Blocks
-preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = void comment
- <|> void sp
- <|> void blanklines
- <|> void include
- <|> void macro
- <|> void blockCommand
- <|> void anyControlSeq
- <|> void braced
- <|> void anyChar
-
--------
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- keys <- try $ bgroup *> (manyTill citationLabel egroup)
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m String
-citationLabel = optional sp *>
- (many1 (satisfy isBibtexKeyChar)
- <* optional sp
- <* optional (char ',')
- <* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)
+-- tables
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then many1 simpleCiteArgs
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional $ bracketed inline
+ return ()
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") <* spaces
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
- let ils = (toList . trimInlines . mconcat) <$>
- many (notFollowedBy (oneOf "\\};") >> inline)
- let parseOne = try $ do
- skipSpaces
- pref <- ils
- cit' <- inline -- expect a citation
- let citlist = toList cit'
- cits' <- case citlist of
- [Cite cs _] -> return cs
- _ -> mzero
- suff <- ils
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff cits'
- (c:cits, raw) <- withRaw $ grouped parseOne
- return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ raw)
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
--- tables
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) -> do
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ _ -> return ()
-parseAligns :: PandocMonad m => LP m [(Alignment, Double, (String, String))]
+parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
parseAligns = try $ do
- bgroup
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
- maybeBar
- let cAlign = AlignCenter <$ char 'c'
- let lAlign = AlignLeft <$ char 'l'
- let rAlign = AlignRight <$ char 'r'
- let parAlign = AlignLeft <$ char 'p'
- -- algins from tabularx
- let xAlign = AlignLeft <$ char 'X'
- let mAlign = AlignLeft <$ char 'm'
- let bAlign = AlignLeft <$ char 'b'
- let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
- <|> xAlign <|> mAlign <|> bAlign
- let alignPrefix = char '>' >> braced
- let alignSuffix = char '<' >> braced
+ let maybeBar = skipMany $
+ sp <|> () <$ symbol '|' <|> () <$ (symbol '@' >> braced)
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
- char '{'
- ds <- many1 (oneOf "0123456789.")
+ symbol '{'
+ ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
spaces
- string "\\linewidth"
- char '}'
+ symbol '}'
case safeRead ds of
Just w -> return w
Nothing -> return 0.0
- let alignSpec = do
+ let alignSpec = try $ do
spaces
- pref <- option "" alignPrefix
+ pref <- option [] alignPrefix
spaces
al <- alignChar
- width <- colWidth <|> option 0.0 (do s <- braced
+ width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
pos <- getPosition
report $ SkippedContent s pos
return 0.0)
spaces
- suff <- option "" alignSuffix
+ suff <- option [] alignSuffix
return (al, width, (pref, suff))
- aligns' <- sepEndBy alignSpec maybeBar
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many (alignSpec <* maybeBar)
spaces
egroup
spaces
- return $ aligns'
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces'
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces'
- optional $ bracketed (many1 (satisfy (/=']')))
- return ()
-
-lbreak :: PandocMonad m => LP m ()
-lbreak = () <$ try (spaces' *>
- (controlSeq "\\" <|> controlSeq "tabularnewline") <*
- spaces')
-
-amp :: PandocMonad m => LP m ()
-amp = () <$ try (spaces' *> char '&' <* spaces')
+ return aligns'
parseTableRow :: PandocMonad m
- => String -- ^ table environment name
- -> [(String, String)] -- ^ pref/suffixes
+ => Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
-> LP m [Blocks]
-parseTableRow envname prefsufs = try $ do
+parseTableRow envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
let cols = length prefsufs
- let tableCellRaw = concat <$> many
- (do notFollowedBy amp
- notFollowedBy lbreak
- notFollowedBy $ () <$ try (string ("\\end{" ++ envname ++ "}"))
- many1 (noneOf "&%\n\r\\")
- <|> try (string "\\&")
- <|> count 1 anyChar)
- let plainify bs = case toList bs of
- [Para ils] -> plain (fromList ils)
- _ -> bs
- rawcells <- sepBy1 tableCellRaw amp
- guard $ length rawcells == cols
- let rawcells' = zipWith (\c (p, s) -> p ++ trim c ++ s) rawcells prefsufs
- let tableCell = plainify <$> blocks
- cells' <- mapM (parseFromString' tableCell) rawcells'
- let numcells = length cells'
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- many (notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ >> anyTok)
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos (sourceLine prefpos, sourceColumn prefpos)) pref
+ ++ contents ++
+ map (setpos (sourceLine suffpos, sourceColumn suffpos)) suff
+ rawcells <- sequence (map celltoks prefsufs)
+ oldInput <- getInput
+ cells <- sequence $ map (\ts -> setInput ts >> parseTableCell) rawcells
+ setInput oldInput
+ spaces
+ let numcells = length cells
guard $ numcells <= cols && numcells >= 1
- guard $ cells' /= [mempty]
+ guard $ cells /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
- let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces'
- return cells''
+ return $ cells ++ replicate (cols - numcells) mempty
-spaces' :: PandocMonad m => LP m ()
-spaces' = spaces *> skipMany (comment *> spaces)
+parseTableCell :: PandocMonad m => LP m Blocks
+parseTableCell = do
+ let plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+ updateState $ \st -> st{ sInTableCell = True }
+ cells <- plainify <$> blocks
+ updateState $ \st -> st{ sInTableCell = False }
+ return cells
-simpTable :: PandocMonad m => String -> Bool -> LP m Blocks
+simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
simpTable envname hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces' >> tok)
+ when hasWidthParameter $ () <$ (spaces >> tok)
skipopts
colspecs <- parseAligns
let (aligns, widths, prefsufs) = unzip3 colspecs
let cols = length colspecs
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
skipMany hline
- spaces'
+ spaces
header' <- option [] $ try (parseTableRow envname prefsufs <*
lbreak <* many1 hline)
- spaces'
+ spaces
rows <- sepEndBy (parseTableRow envname prefsufs)
(lbreak <* optional (skipMany hline))
- spaces'
+ spaces
optional $ controlSeq "caption" *> skipopts *> setCaption
optional lbreak
- spaces'
+ spaces
let header'' = if null header'
then replicate cols mempty
else header'
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table mempty (zip aligns widths) header'' rows
-removeDoubleQuotes :: String -> String
-removeDoubleQuotes ('"':xs) =
- case reverse xs of
- '"':ys -> reverse ys
- _ -> '"':xs
-removeDoubleQuotes xs = xs
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table c als ws hs rs) = do
+ mbcapt <- sCaption <$> getState
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
+ go x = return x
+
+
+block :: PandocMonad m => LP m Blocks
+block = (mempty <$ spaces1)
+ <|> environment
+ <|> include
+ <|> macroDef
+ <|> blockCommand
+ <|> paragraph
+ <|> grouped block
+
+blocks :: PandocMonad m => LP m Blocks
+blocks = mconcat <$> many block
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
new file mode 100644
index 000000000..6f84ae1f1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -0,0 +1,48 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Types
+ Copyright : Copyright (C) 2017 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Types for LaTeX tokens and macros.
+-}
+module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
+ , TokType(..)
+ , Macro(..)
+ , Line
+ , Column )
+where
+import Data.Text (Text)
+import Text.Parsec.Pos (Line, Column)
+
+data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
+ Esc1 | Esc2 | Arg Int
+ deriving (Eq, Ord, Show)
+
+data Tok = Tok (Line, Column) TokType Text
+ deriving (Eq, Ord, Show)
+
+data Macro = Macro Int (Maybe [Tok]) [Tok]
+ deriving Show
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e1c481311..ab6a32b78 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -52,15 +52,17 @@ import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..), report)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
-import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline, applyMacros,
+ macro)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
@@ -74,7 +76,7 @@ readMarkdown :: PandocMonad m
-> m Pandoc
readMarkdown opts s = do
parsed <- (readWithM parseMarkdown) def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -291,18 +293,22 @@ ignorable t = (T.pack "_") `T.isSuffixOf` t
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
-toMetaValue x = toMeta <$> parseFromString' parseBlocks (T.unpack x)
- where
- toMeta p = do
- p' <- p
- return $
- case B.toList p' of
- [Plain xs] -> MetaInlines xs
- [Para xs]
- | endsWithNewline x -> MetaBlocks [Para xs]
- | otherwise -> MetaInlines xs
- bs -> MetaBlocks bs
- endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+toMetaValue x =
+ parseFromString' parser' (T.unpack x)
+ where parser' = (asInlines <$> ((trimInlinesF . mconcat)
+ <$> (guard (not endsWithNewline)
+ *> manyTill inline eof)))
+ <|> (asBlocks <$> parseBlocks)
+ asBlocks p = do
+ p' <- p
+ return $ MetaBlocks (B.toList p')
+ asInlines p = do
+ p' <- p
+ return $ MetaInlines (B.toList p')
+ endsWithNewline = T.pack "\n" `T.isSuffixOf` x
+ -- Note: a standard quoted or unquoted YAML value will
+ -- not end in a newline, but a "block" set off with
+ -- `|` or `>` will.
yamlToMeta :: PandocMonad m
=> Yaml.Value -> MarkdownParser m (F MetaValue)
@@ -368,13 +374,14 @@ parseMarkdown = do
-- lookup to get sourcepos
case M.lookup n (stateNotes' st) of
Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
- Nothing -> error "The impossible happened.") notesDefined
+ Nothing -> throwError $
+ PandocShouldNeverHappenError "note not found")
+ notesDefined
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
meta <- stateMeta' st
return $ Pandoc meta bs) st
reportLogMessages
- (do guardEnabled Ext_east_asian_line_breaks
- return $ eastAsianLineBreakFilter doc) <|> return doc
+ return doc
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
@@ -488,7 +495,6 @@ parseBlocks = mconcat <$> manyTill block eof
block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
- pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
@@ -514,8 +520,7 @@ block = do
, para
, plain
] <?> "block"
- report $ ParsingTrace
- (take 60 $ show $ B.toList $ runF res defaultParserState) pos
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
return res
--
@@ -680,19 +685,36 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
+rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute = do
+ char '{'
+ skipMany spaceChar
+ char '='
+ format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ skipMany spaceChar
+ char '}'
+ return format
+
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
- attr <- option ([],[],[]) $
- try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
blankline
- contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
+ contents <- intercalate "\n" <$>
+ manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawBlock syn contents
+ Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
@@ -1013,7 +1035,8 @@ para = try $ do
result' <- result
case B.toList result' of
[Image attr alt (src,tit)]
- | Ext_implicit_figures `extensionEnabled` exts ->
+ | not (null alt) &&
+ Ext_implicit_figures `extensionEnabled` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
$ Image attr alt (src,'f':'i':'g':':':tit)
@@ -1083,10 +1106,11 @@ latexMacro = try $ do
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
- <|> (B.rawBlock "context" . concat <$>
+ result <- (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
+ <|> (B.rawBlock "latex" . concat <$>
+ rawLaTeXBlock `sepEndBy1` blankline)
+
spaces
return $ return result
@@ -1515,17 +1539,24 @@ code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ result <- (trim . concat) <$>
+ many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes
- >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
+ rawattr <-
+ (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ <|>
+ (Right <$> option ("",[],[])
+ (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+ return $ return $
+ case rawattr of
+ Left syn -> B.rawInline syn result
+ Right attr -> B.codeWith attr result
math :: PandocMonad m => MarkdownParser m (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
+math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
+ <|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
(guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
@@ -1849,9 +1880,8 @@ rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead (char '\\')
notFollowedBy' rawConTeXtEnvironment
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
- -- "tex" because it might be context or latex
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index a3ff60c14..a7f073d50 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -52,13 +52,14 @@ import qualified Data.Set as Set
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim)
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim,
+ crFilter)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
@@ -77,7 +78,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (unpack s ++ "\n")
+ (unpack (crFilter s) ++ "\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -205,7 +206,6 @@ parseMediaWiki = do
block :: PandocMonad m => MWParser m Blocks
block = do
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> table
<|> header
@@ -218,7 +218,7 @@ block = do
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
para :: PandocMonad m => MWParser m Blocks
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
new file mode 100644
index 000000000..1ae73c148
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -0,0 +1,607 @@
+{-
+ Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Muse
+ Copyright : Copyright (C) 2017 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of Muse text to 'Pandoc' document.
+-}
+{-
+TODO:
+- {{{ }}} syntax for <example>
+- Page breaks (five "*")
+- Headings with anchors (make it round trip with Muse writer)
+- <verse> and ">"
+- Definition lists
+- Org tables
+- table.el tables
+- Images with attributes (floating and width)
+- Anchors
+- Citations and <biblio>
+- <play> environment
+- <verbatim> tag
+-}
+module Text.Pandoc.Readers.Muse (readMuse) where
+
+import Control.Monad
+import Control.Monad.Except (throwError)
+import qualified Data.Map as M
+import Data.Text (Text, unpack)
+import Data.List (stripPrefix)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter)
+import Text.Pandoc.Parsing hiding (nested)
+import Text.Pandoc.Readers.HTML (htmlTag)
+import Text.Pandoc.XML (fromEntities)
+import System.FilePath (takeExtension)
+
+-- | Read Muse from an input string and return a Pandoc document.
+readMuse :: PandocMonad m
+ => ReaderOptions
+ -> Text
+ -> m Pandoc
+readMuse opts s = do
+ res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right d -> return d
+
+type MuseParser = ParserT String ParserState
+
+--
+-- main parser
+--
+
+parseMuse :: PandocMonad m => MuseParser m Pandoc
+parseMuse = do
+ many directive
+ blocks <- parseBlocks
+ st <- getState
+ let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+ meta <- stateMeta' st
+ return $ Pandoc meta bs) st
+ reportLogMessages
+ return doc
+
+parseBlocks :: PandocMonad m => MuseParser m (F Blocks)
+parseBlocks = do
+ res <- mconcat <$> many block
+ spaces
+ eof
+ return res
+
+--
+-- utility functions
+--
+
+nested :: PandocMonad m => MuseParser m a -> MuseParser m a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlElement tag = try $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = void $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: PandocMonad m
+ => String -> MuseParser m a -> MuseParser m (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a]
+parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p)
+
+--
+-- directive parsers
+--
+
+parseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseDirective = do
+ char '#'
+ key <- many letter
+ space
+ spaces
+ raw <- many $ noneOf "\n"
+ newline
+ value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw
+ return (key, value)
+
+directive :: PandocMonad m => MuseParser m ()
+directive = do
+ (key, value) <- parseDirective
+ updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st }
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => MuseParser m (F Blocks)
+block = do
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ trace (take 60 $ show $ B.toList $ runF res defaultParserState)
+ return res
+
+blockElements :: PandocMonad m => MuseParser m (F Blocks)
+blockElements = choice [ comment
+ , separator
+ , header
+ , exampleTag
+ , literal
+ , centerTag
+ , rightTag
+ , quoteTag
+ , bulletList
+ , orderedList
+ , table
+ , commentTag
+ , indentedBlock
+ , noteBlock
+ ]
+
+comment :: PandocMonad m => MuseParser m (F Blocks)
+comment = try $ do
+ char ';'
+ space
+ many $ noneOf "\n"
+ void newline <|> eof
+ return mempty
+
+separator :: PandocMonad m => MuseParser m (F Blocks)
+separator = try $ do
+ string "----"
+ many $ char '-'
+ many spaceChar
+ void newline <|> eof
+ return $ return B.horizontalRule
+
+header :: PandocMonad m => MuseParser m (F Blocks)
+header = try $ do
+ st <- stateParserContext <$> getState
+ q <- stateQuoteContext <$> getState
+ getPosition >>= \pos -> guard (st == NullState && q == NoQuote && sourceColumn pos == 1)
+ level <- liftM length $ many1 $ char '*'
+ guard $ level <= 5
+ skipSpaces
+ content <- trimInlinesF . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", [], []) (runF content defaultParserState)
+ return $ B.headerWith attr level <$> content
+
+exampleTag :: PandocMonad m => MuseParser m (F Blocks)
+exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example"
+
+literal :: PandocMonad m => MuseParser m (F Blocks)
+literal = liftM (return . rawBlock) $ htmlElement "literal"
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+blockTag :: PandocMonad m
+ => (Blocks -> Blocks)
+ -> String
+ -> MuseParser m (F Blocks)
+blockTag f s = do
+ res <- parseHtmlContent s block
+ return $ f <$> mconcat res
+
+-- <center> tag is ignored
+centerTag :: PandocMonad m => MuseParser m (F Blocks)
+centerTag = blockTag id "center"
+
+-- <right> tag is ignored
+rightTag :: PandocMonad m => MuseParser m (F Blocks)
+rightTag = blockTag id "right"
+
+quoteTag :: PandocMonad m => MuseParser m (F Blocks)
+quoteTag = blockTag B.blockQuote "quote"
+
+commentTag :: PandocMonad m => MuseParser m (F Blocks)
+commentTag = parseHtmlContent "comment" block >> return mempty
+
+-- Indented block is either center, right or quote
+indentedLine :: PandocMonad m => MuseParser m (Int, String)
+indentedLine = try $ do
+ indent <- length <$> many1 spaceChar
+ line <- anyLine
+ return (indent, line)
+
+rawIndentedBlock :: PandocMonad m => MuseParser m (Int, String)
+rawIndentedBlock = try $ do
+ lns <- many1 indentedLine
+ let indent = minimum $ map fst lns
+ return (indent, unlines $ map snd lns)
+
+indentedBlock :: PandocMonad m => MuseParser m (F Blocks)
+indentedBlock = try $ do
+ (indent, raw) <- rawIndentedBlock
+ contents <- withQuoteContext InDoubleQuote $ parseFromString parseBlocks raw
+ return $ (if indent >= 2 && indent < 6 then B.blockQuote else id) <$> contents
+
+para :: PandocMonad m => MuseParser m (F Blocks)
+para = liftM B.para . trimInlinesF . mconcat <$> many1Till inline endOfParaElement
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> void blockElements
+
+noteMarker :: PandocMonad m => MuseParser m String
+noteMarker = try $ do
+ char '['
+ many1Till digit $ char ']'
+
+noteBlock :: PandocMonad m => MuseParser m (F Blocks)
+noteBlock = try $ do
+ pos <- getPosition
+ ref <- noteMarker <* skipSpaces
+ content <- mconcat <$> blocksTillNote
+ oldnotes <- stateNotes' <$> getState
+ case M.lookup ref oldnotes of
+ Just _ -> logMessage $ DuplicateNoteReference ref pos
+ Nothing -> return ()
+ updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes }
+ return mempty
+ where
+ blocksTillNote =
+ many1Till block (eof <|> () <$ lookAhead noteMarker)
+
+--
+-- lists
+--
+
+listLine :: PandocMonad m => Int -> MuseParser m String
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ anyLineNewline
+
+withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a
+withListContext p = do
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state { stateParserContext = ListItemState }
+ parsed <- p
+ updateState (\st -> st {stateParserContext = oldContext})
+ return parsed
+
+listContinuation :: PandocMonad m => Int -> MuseParser m String
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 $ listLine markerLength
+ return $ blanks ++ concat result
+
+listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int
+listStart marker = try $ do
+ preWhitespace <- length <$> many spaceChar
+ st <- stateParserContext <$> getState
+ getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1)
+ markerLength <- marker
+ postWhitespace <- length <$> many1 spaceChar
+ return $ preWhitespace + markerLength + postWhitespace
+
+listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks)
+listItem start = try $ do
+ markerLength <- start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ restLines <- many $ listLine markerLength
+ let first = firstLine ++ blank ++ concat restLines
+ rest <- many $ listContinuation markerLength
+ parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n"
+
+bulletListItems :: PandocMonad m => MuseParser m (F [Blocks])
+bulletListItems = sequence <$> many1 (listItem bulletListStart)
+
+bulletListStart :: PandocMonad m => MuseParser m Int
+bulletListStart = listStart (char '-' >> return 1)
+
+bulletList :: PandocMonad m => MuseParser m (F Blocks)
+bulletList = do
+ listItems <- bulletListItems
+ return $ B.bulletList <$> listItems
+
+orderedListStart :: PandocMonad m
+ => ListNumberStyle
+ -> ListNumberDelim
+ -> MuseParser m Int
+orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim))
+
+orderedList :: PandocMonad m => MuseParser m (F Blocks)
+orderedList = try $ do
+ p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar)
+ guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
+ guard $ delim == Period
+ items <- sequence <$> many1 (listItem $ orderedListStart style delim)
+ return $ B.orderedListWith p <$> items
+
+--
+-- tables
+--
+
+data MuseTable = MuseTable
+ { museTableCaption :: Inlines
+ , museTableHeaders :: [[Blocks]]
+ , museTableRows :: [[Blocks]]
+ , museTableFooters :: [[Blocks]]
+ }
+
+data MuseTableElement = MuseHeaderRow (F [Blocks])
+ | MuseBodyRow (F [Blocks])
+ | MuseFooterRow (F [Blocks])
+ | MuseCaption (F Inlines)
+
+museToPandocTable :: MuseTable -> Blocks
+museToPandocTable (MuseTable caption headers body footers) =
+ B.table caption attrs headRow rows
+ where ncol = maximum (0 : map length (headers ++ body ++ footers))
+ attrs = replicate ncol (AlignDefault, 0.0)
+ headRow = if null headers then [] else head headers
+ rows = (if null headers then [] else tail headers) ++ body ++ footers
+
+museAppendElement :: MuseTable
+ -> MuseTableElement
+ -> F MuseTable
+museAppendElement tbl element =
+ case element of
+ MuseHeaderRow row -> do
+ row' <- row
+ return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] }
+ MuseBodyRow row -> do
+ row' <- row
+ return tbl{ museTableRows = museTableRows tbl ++ [row'] }
+ MuseFooterRow row-> do
+ row' <- row
+ return tbl{ museTableFooters = museTableFooters tbl ++ [row'] }
+ MuseCaption inlines -> do
+ inlines' <- inlines
+ return tbl{ museTableCaption = inlines' }
+
+tableCell :: PandocMonad m => MuseParser m (F Blocks)
+tableCell = try $ do
+ content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
+ return $ B.plain <$> content
+ where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof
+
+tableElements :: PandocMonad m => MuseParser m [MuseTableElement]
+tableElements = tableParseElement `sepEndBy1` (void newline <|> eof)
+
+elementsToTable :: [MuseTableElement] -> F MuseTable
+elementsToTable = foldM museAppendElement emptyTable
+ where emptyTable = MuseTable mempty mempty mempty mempty
+
+table :: PandocMonad m => MuseParser m (F Blocks)
+table = try $ do
+ rows <- tableElements
+ let tbl = elementsToTable rows
+ let pandocTbl = museToPandocTable <$> tbl :: F Blocks
+ return pandocTbl
+
+tableParseElement :: PandocMonad m => MuseParser m MuseTableElement
+tableParseElement = tableParseHeader
+ <|> tableParseBody
+ <|> tableParseFooter
+ <|> tableParseCaption
+
+tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow n = try $ do
+ fields <- tableCell `sepBy2` fieldSep
+ return $ sequence fields
+ where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
+ fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+
+tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
+tableParseHeader = MuseHeaderRow <$> tableParseRow 2
+
+tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
+tableParseBody = MuseBodyRow <$> tableParseRow 1
+
+tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
+tableParseFooter = MuseFooterRow <$> tableParseRow 3
+
+tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
+tableParseCaption = try $ do
+ many spaceChar
+ string "|+"
+ contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|")
+ string "+|"
+ return $ MuseCaption contents
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => MuseParser m (F Inlines)
+inline = choice [ br
+ , footnote
+ , strong
+ , strongTag
+ , emph
+ , emphTag
+ , superscriptTag
+ , subscriptTag
+ , strikeoutTag
+ , link
+ , code
+ , codeTag
+ , whitespace
+ , str
+ , symbol
+ ] <?> "inline"
+
+footnote :: PandocMonad m => MuseParser m (F Inlines)
+footnote = try $ do
+ ref <- noteMarker
+ return $ do
+ notes <- asksF stateNotes'
+ case M.lookup ref notes of
+ Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Just (_pos, contents) -> do
+ st <- askF
+ let contents' = runF contents st { stateNotes' = M.empty }
+ return $ B.note contents'
+
+whitespace :: PandocMonad m => MuseParser m (F Inlines)
+whitespace = liftM return (lb <|> regsp)
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: PandocMonad m => MuseParser m (F Inlines)
+br = try $ do
+ string "<br>"
+ return $ return B.linebreak
+
+linebreak :: PandocMonad m => MuseParser m (F Inlines)
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = do
+ eof
+ return $ return mempty
+ innerNewline = return $ return B.space
+
+emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
+emphasisBetween c = try $ enclosedInlines c c
+
+enclosedInlines :: (PandocMonad m, Show a, Show b)
+ => MuseParser m a
+ -> MuseParser m b
+ -> MuseParser m (F Inlines)
+enclosedInlines start end = try $
+ trimInlinesF . mconcat <$> enclosed start end inline
+
+verbatimBetween :: PandocMonad m
+ => Char
+ -> MuseParser m String
+verbatimBetween c = try $ do
+ char c
+ many1Till anyChar $ char c
+
+inlineTag :: PandocMonad m
+ => (Inlines -> Inlines)
+ -> String
+ -> MuseParser m (F Inlines)
+inlineTag f s = do
+ res <- parseHtmlContent s inline
+ return $ f <$> mconcat res
+
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = inlineTag B.strong "strong"
+
+strong :: PandocMonad m => MuseParser m (F Inlines)
+strong = fmap B.strong <$> emphasisBetween (string "**")
+
+emph :: PandocMonad m => MuseParser m (F Inlines)
+emph = fmap B.emph <$> emphasisBetween (char '*')
+
+emphTag :: PandocMonad m => MuseParser m (F Inlines)
+emphTag = inlineTag B.emph "em"
+
+superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+superscriptTag = inlineTag B.superscript "sup"
+
+subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+subscriptTag = inlineTag B.subscript "sub"
+
+strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
+strikeoutTag = inlineTag B.strikeout "del"
+
+code :: PandocMonad m => MuseParser m (F Inlines)
+code = try $ do
+ pos <- getPosition
+ sp <- if sourceColumn pos == 1
+ then pure mempty
+ else skipMany1 spaceChar >> pure B.space
+ cd <- verbatimBetween '='
+ notFollowedBy nonspaceChar
+ return $ return (sp B.<> B.code cd)
+
+codeTag :: PandocMonad m => MuseParser m (F Inlines)
+codeTag = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ return $ B.codeWith attrs $ fromEntities content
+
+str :: PandocMonad m => MuseParser m (F Inlines)
+str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => MuseParser m (F Inlines)
+symbol = liftM (return . B.str) $ count 1 nonspaceChar
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ case stripPrefix "URL:" url of
+ Nothing -> if isImageUrl url
+ then B.image url title <$> fromMaybe (return mempty) content
+ else B.link url title <$> fromMaybe (return $ B.str url) content
+ Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content
+ where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ isImageUrl = (`elem` imageExtensions) . takeExtension
+
+linkContent :: PandocMonad m => MuseParser m (F Inlines)
+linkContent = do
+ char '['
+ res <- many1Till anyChar $ char ']'
+ parseFromString (mconcat <$> many1 inline) res
+
+linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines))
+linkText = do
+ string "[["
+ url <- many1Till anyChar $ char ']'
+ content <- optionMaybe linkContent
+ char ']'
+ return (url, "", content)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 591d7590e..c25ace800 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
-import Control.Monad.State
+import Control.Monad.State.Strict
import Data.Char (toUpper)
import Data.Text (Text, unpack, pack)
import Data.Default
@@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.XML.Light
@@ -32,7 +33,8 @@ instance Default OPMLState where
readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML _ inp = do
(bs, st') <- flip runStateT def
- (mapM parseBlock $ normalizeTree $ parseXML (unpack inp))
+ (mapM parseBlock $ normalizeTree $
+ parseXML (unpack (crFilter inp)))
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
index 4d6a67b8e..8c47cdaf5 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs
@@ -124,8 +124,3 @@ instance ChoiceVector SuccessList where
spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing
where unTagRight (Right x) = (x:)
unTagRight _ = id
-
--- | Like 'catMaybes', but for 'Either'.
-collectRights :: [Either _l r] -> [r]
-collectRights = collectNonFailing . untag . spreadChoice . SuccessList
- where untag = fromLeft (error "Unexpected Left")
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 1c3e08a7f..428048427 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -71,6 +71,7 @@ import Control.Applicative hiding ( liftA, liftA2 )
import Control.Monad ( MonadPlus )
import Control.Arrow
+import Data.Either ( rights )
import qualified Data.Map as M
import Data.Default
import Data.Maybe
@@ -604,7 +605,7 @@ tryAll :: (NameSpaceID nsID)
-> XMLConverter nsID extraState b [a]
tryAll nsID name a = prepareIteration nsID name
>>> iterateS (switchingTheStack a)
- >>^ collectRights
+ >>^ rights
--------------------------------------------------------------------------------
-- Matching children
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5e0d67d10..eaccc251c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -36,6 +36,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
+import Text.Pandoc.Shared (crFilter)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
@@ -51,7 +52,7 @@ readOrg :: PandocMonad m
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 66273e05d..42fdfd4dd 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -826,9 +826,10 @@ maybeRight = either (const Nothing) Just
inlineLaTeXCommand :: PandocMonad m => OrgParser m String
inlineLaTeXCommand = try $ do
rest <- getInput
- parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
+ st <- getState
+ parsed <- (lift . lift) $ runParserT rawLaTeXInline st "source" rest
case parsed of
- Right (RawInline _ cs) -> do
+ Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
let cmdNoSpc = dropWhileEnd isSpace cs
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 92f868516..fc98213fb 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, OrgNoteRecord
, HasReaderOptions (..)
, HasQuoteContext (..)
+ , HasMacros (..)
, TodoMarker (..)
, TodoSequence
, TodoState (..)
@@ -57,14 +58,17 @@ import Control.Monad.Reader (ReaderT, asks, local)
import Data.Default (Default (..))
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Definition (Meta (..), nullMeta)
import Text.Pandoc.Logging
import Text.Pandoc.Options (ReaderOptions (..))
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..),
HasIncludeFiles (..), HasLastStrPosition (..),
HasLogMessages (..), HasQuoteContext (..),
+ HasMacros (..),
HasReaderOptions (..), ParserContext (..),
QuoteContext (..), SourcePos, askF, asksF, returnF,
runF, trimInlinesF)
@@ -118,6 +122,7 @@ data OrgParserState = OrgParserState
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
, orgLogMessages :: [LogMessage]
+ , orgMacros :: M.Map Text Macro
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@@ -148,6 +153,10 @@ instance HasLogMessages OrgParserState where
addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
getLogMessages st = reverse $ orgLogMessages st
+instance HasMacros OrgParserState where
+ extractMacros st = orgMacros st
+ updateMacros f st = st{ orgMacros = f (orgMacros st) }
+
instance HasIncludeFiles OrgParserState where
getIncludeFiles = orgStateIncludeFiles
addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st }
@@ -178,6 +187,7 @@ defaultOrgParserState = OrgParserState
, orgStateParserContext = NullState
, orgStateTodoSequences = []
, orgLogMessages = []
+ , orgMacros = M.empty
}
optionsToParserState :: ReaderOptions -> OrgParserState
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index fb5f6f2d4..2daf60a89 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,7 +31,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST ( readRST ) where
-import Control.Monad (guard, liftM, mzero, when)
+import Control.Monad (guard, liftM, mzero, when, forM_)
import Control.Monad.Identity (Identity(..))
import Control.Monad.Except (throwError)
import Data.Char (isHexDigit, isSpace, toLower, toUpper)
@@ -68,7 +68,7 @@ readRST :: PandocMonad m
-> m Pandoc
readRST opts s = do
parsed <- (readWithM parseRST) def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -170,7 +170,8 @@ parseRST = do
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
docMinusKeys <- concat <$>
- manyTill (referenceKey <|> noteBlock <|> citationBlock <|>
+ manyTill (referenceKey <|> anchorDef <|>
+ noteBlock <|> citationBlock <|>
headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
@@ -217,6 +218,7 @@ block = choice [ codeBlock
, fieldList
, include
, directive
+ , anchor
, comment
, header
, hrule
@@ -1054,16 +1056,49 @@ stripTicks = reverse . stripTick . reverse . stripTick
where stripTick ('`':xs) = xs
stripTick xs = xs
+referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames = do
+ let rn = try $ do
+ string ".. _"
+ (_, ref) <- withRaw referenceName
+ char ':'
+ return ref
+ first <- rn
+ rest <- many (try (blanklines *> rn))
+ return (first:rest)
+
regularKey :: PandocMonad m => RSTParser m ()
regularKey = try $ do
- string ".. _"
- (_,ref) <- withRaw referenceName
- char ':'
+ -- we allow several references to the same URL, e.g.
+ -- .. _hello:
+ -- .. _goodbye: url.com
+ refs <- referenceNames
src <- targetURI
- let key = toKey $ stripTicks ref
+ guard $ not (null src)
--TODO: parse width, height, class and name attributes
- updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
- stateKeys s }
+ let keys = map (toKey . stripTicks) refs
+ forM_ keys $ \key ->
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
+ stateKeys s }
+
+anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef = try $ do
+ (refs, raw) <- withRaw (try (referenceNames <* blanklines))
+ let keys = map stripTicks refs
+ forM_ keys $ \rawkey ->
+ updateState $ \s -> s { stateKeys =
+ M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ -- keep this for 2nd round of parsing, where we'll add the divs (anchor)
+ return raw
+
+anchor :: PandocMonad m => RSTParser m Blocks
+anchor = try $ do
+ refs <- referenceNames
+ blanklines
+ b <- block
+ -- put identifier on next block:
+ let addDiv ref = B.divWith (ref, [], [])
+ return $ foldr addDiv b refs
headerBlock :: PandocMonad m => RSTParser m [Char]
headerBlock = do
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 9e544c4ac..d41152de5 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -42,13 +42,13 @@ import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Text.HTML.TagSoup
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.Definition
-import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.XML (fromEntities)
+import Text.Pandoc.Shared (crFilter)
import Data.Text (Text)
import qualified Data.Text as T
@@ -59,7 +59,7 @@ readTWiki :: PandocMonad m
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case res of
Left e -> throwError e
Right d -> return d
@@ -133,12 +133,11 @@ parseTWiki = do
block :: PandocMonad m => TWParser m B.Blocks
block = do
- pos <- getPosition
res <- mempty <$ skipMany1 blankline
<|> blockElements
<|> para
skipMany blankline
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
blockElements :: PandocMonad m => TWParser m B.Blocks
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 1669e3e51..853d2768f 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -61,15 +61,14 @@ import Text.HTML.TagSoup (Tag (..), fromAttrib)
import Text.HTML.TagSoup.Match
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad(..))
import Text.Pandoc.CSS
import Text.Pandoc.Definition
-import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (trim)
+import Text.Pandoc.Shared (trim, crFilter)
import Data.Text (Text)
import qualified Data.Text as T
@@ -80,7 +79,7 @@ readTextile :: PandocMonad m
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
- (T.unpack s ++ "\n\n")
+ (T.unpack (crFilter s) ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -143,8 +142,7 @@ blockParsers = [ codeBlock
block :: PandocMonad m => ParserT [Char] ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
- pos <- getPosition
- report $ ParsingTrace (take 60 $ show $ B.toList res) pos
+ trace (take 60 $ show $ B.toList res)
return res
commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
@@ -575,7 +573,7 @@ rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- B.singleton <$> rawLaTeXInline
+ B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 260bb7fff..f000646c2 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -40,8 +40,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (macro, space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
+import Text.Pandoc.Parsing hiding (space, spaces, uri)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter)
import Control.Monad (guard, void, when)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Default
@@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
- let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n")
+ let parsed = flip runReader meta $
+ readWithM parseT2T (def {stateOptions = opts}) $
+ T.unpack (crFilter s) ++ "\n\n"
case parsed of
Right result -> return $ result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
new file mode 100644
index 000000000..52bf37d35
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -0,0 +1,673 @@
+{-
+ Copyright (C) 2017 Yuchen Pei <me@ypei.me>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Vimwiki
+ Copyright : Copyright (C) 2017 Yuchen Pei
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Yuchen Pei <me@ypei.me>
+ Stability : alpha
+ Portability : portable
+
+Conversion of vimwiki text to 'Pandoc' document.
+-}
+{--
+[X]: implemented
+[O]: not implemented
+* block parsers:
+ * [X] header
+ * [X] hrule
+ * [X] comment
+ * [X] blockquote
+ * [X] preformatted -- using codeblock
+ * [X] displaymath
+ * [X] bulletlist / orderedlist
+ * [X] todo lists -- using span.
+ * [X] table
+ * [X] centered table -- using div
+ * [O] colspan and rowspan -- see issue #1024
+ * [X] paragraph
+ * [X] definition list
+* inline parsers:
+ * [X] bareURL
+ * [X] strong
+ * [X] emph
+ * [X] strikeout
+ * [X] code
+ * [X] link
+ * [X] image
+ * [X] inline math
+ * [X] tag
+ * [X] sub- and super-scripts
+* misc:
+ * [X] `TODO:` mark
+ * [X] metadata placeholders: %title and %date
+ * [O] control placeholders: %template and %nohtml -- ignored
+--}
+
+module Text.Pandoc.Readers.Vimwiki ( readVimwiki
+ ) where
+import Control.Monad.Except (throwError)
+import Control.Monad (guard)
+import Data.Default
+import Data.Maybe
+import Data.Monoid ((<>))
+import Data.List (isInfixOf, isPrefixOf)
+import Data.Text (Text, unpack)
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList)
+import qualified Text.Pandoc.Builder
+ as B (headerWith, str, space, strong, emph, strikeout, code, link, image,
+ spanWith, para, horizontalRule, blockQuote, bulletList, plain,
+ orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith,
+ setMeta, definitionList, superscript, subscript, displayMath,
+ math)
+import Text.Pandoc.Class (PandocMonad(..))
+import Text.Pandoc.Definition (Pandoc(..), Inline(Space),
+ Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..),
+ ListNumberDelim(..))
+import Text.Pandoc.Options (ReaderOptions)
+import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState,
+ stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF,
+ orderedListMarker, many1Till)
+import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter)
+import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf,
+ alphaNum)
+import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1,
+ notFollowedBy, option)
+import Text.Parsec.Prim (many, try, updateState, getState)
+import Text.Parsec.Char (oneOf, space)
+import Text.Parsec.Combinator (lookAhead, between)
+import Text.Parsec.Prim ((<|>))
+
+readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki opts s = do
+ res <- readWithM parseVimwiki def{ stateOptions = opts }
+ (unpack (crFilter s))
+ case res of
+ Left e -> throwError e
+ Right result -> return result
+
+type VwParser = ParserT [Char] ParserState
+
+
+-- constants
+
+specialChars :: [Char]
+specialChars = "=*-#[]_~{}`$|:%^,"
+
+spaceChars :: [Char]
+spaceChars = " \t\n"
+
+-- main parser
+
+parseVimwiki :: PandocMonad m => VwParser m Pandoc
+parseVimwiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ st <- getState
+ let meta = runF (stateMeta' st) st
+ return $ Pandoc meta (toList bs)
+
+-- block parser
+
+block :: PandocMonad m => VwParser m Blocks
+block = do
+ res <- choice [ mempty <$ blanklines
+ , header
+ , hrule
+ , mempty <$ comment
+ , mixedList
+ , preformatted
+ , displayMath
+ , table
+ , mempty <$ placeholder
+ , blockQuote
+ , definitionList
+ , para
+ ]
+ trace (take 60 $ show $ toList res)
+ return res
+
+blockML :: PandocMonad m => VwParser m Blocks
+blockML = choice [preformatted, displayMath, table]
+
+header :: PandocMonad m => VwParser m Blocks
+header = try $ do
+ sp <- many spaceChar
+ eqs <- many1 (char '=')
+ spaceChar
+ let lev = length eqs
+ guard $ lev <= 6
+ contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar
+ >> (string eqs) >> many spaceChar >> newline)
+ attr <- registerHeader (makeId contents,
+ (if sp == "" then [] else ["justcenter"]), []) contents
+ return $ B.headerWith attr lev contents
+
+para :: PandocMonad m => VwParser m Blocks
+para = try $ do
+ contents <- trimInlines . mconcat <$> many1 inline
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.para contents
+
+hrule :: PandocMonad m => VwParser m Blocks
+hrule = try $ B.horizontalRule <$ (string "----" >> many (char '-') >> newline)
+
+comment :: PandocMonad m => VwParser m ()
+comment = try $ do
+ many spaceChar >> string "%%" >> many (noneOf "\n")
+ return ()
+
+blockQuote :: PandocMonad m => VwParser m Blocks
+blockQuote = try $ do
+ string " "
+ contents <- trimInlines . mconcat <$> many1 inlineBQ
+ if all (==Space) (toList contents)
+ then return mempty
+ else return $ B.blockQuote $ B.plain contents
+
+definitionList :: PandocMonad m => VwParser m Blocks
+definitionList = try $
+ B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT))
+
+dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithDT = do
+ dt <- definitionTerm
+ dds <- many definitionDef
+ return $ (dt, dds)
+
+dlItemWithoutDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
+dlItemWithoutDT = do
+ dds <- many1 definitionDef
+ return $ (mempty, dds)
+
+definitionDef :: PandocMonad m => VwParser m Blocks
+definitionDef = try $
+ (notFollowedBy definitionTerm) >> many spaceChar
+ >> (definitionDef1 <|> definitionDef2)
+
+definitionDef1 :: PandocMonad m => VwParser m Blocks
+definitionDef1 = try $ mempty <$ defMarkerE
+
+definitionDef2 :: PandocMonad m => VwParser m Blocks
+definitionDef2 = try $ B.plain <$>
+ (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline)
+
+
+definitionTerm :: PandocMonad m => VwParser m Inlines
+definitionTerm = try $ do
+ x <- definitionTerm1 <|> definitionTerm2
+ guard $ (stringify x /= "")
+ return x
+
+definitionTerm1 :: PandocMonad m => VwParser m Inlines
+definitionTerm1 = try $
+ trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE)
+
+definitionTerm2 :: PandocMonad m => VwParser m Inlines
+definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline'
+ (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM))
+
+defMarkerM :: PandocMonad m => VwParser m Char
+defMarkerM = string "::" >> spaceChar
+
+defMarkerE :: PandocMonad m => VwParser m Char
+defMarkerE = string "::" >> newline
+
+hasDefMarkerM :: PandocMonad m => VwParser m String
+hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
+
+preformatted :: PandocMonad m => VwParser m Blocks
+preformatted = try $ do
+ many spaceChar >> string "{{{"
+ attrText <- many (noneOf "\n")
+ lookAhead newline
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
+ >> many spaceChar >> newline))
+ if (not $ contents == "") && (head contents == '\n')
+ then return $ B.codeBlockWith (makeAttr attrText) (tail contents)
+ else return $ B.codeBlockWith (makeAttr attrText) contents
+
+makeAttr :: String -> Attr
+makeAttr s =
+ let xs = splitBy (`elem` " \t") s in
+ ("", [], catMaybes $ map nameValue xs)
+
+nameValue :: String -> Maybe (String, String)
+nameValue s =
+ let t = splitBy (== '=') s in
+ if length t /= 2
+ then Nothing
+ else let (a, b) = (head t, last t) in
+ if ((length b) < 2) || ((head b, last b) /= ('"', '"'))
+ then Nothing
+ else Just (a, stripFirstAndLast b)
+
+
+displayMath :: PandocMonad m => VwParser m Blocks
+displayMath = try $ do
+ many spaceChar >> string "{{$"
+ mathTag <- option "" mathTagParser
+ many space
+ contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$"
+ >> many spaceChar >> newline))
+ let contentsWithTags
+ | mathTag == "" = contents
+ | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents
+ ++ "\n\\end{" ++ mathTag ++ "}"
+ return $ B.para $ B.displayMath contentsWithTags
+
+
+mathTagLaTeX :: String -> String
+mathTagLaTeX s = case s of
+ "equation" -> ""
+ "equation*" -> ""
+ "gather" -> "gathered"
+ "gather*" -> "gathered"
+ "multline" -> "gathered"
+ "multline*" -> "gathered"
+ "eqnarray" -> "aligned"
+ "eqnarray*" -> "aligned"
+ "align" -> "aligned"
+ "align*" -> "aligned"
+ "alignat" -> "aligned"
+ "alignat*" -> "aligned"
+ _ -> s
+
+
+mixedList :: PandocMonad m => VwParser m Blocks
+mixedList = try $ do
+ (bl, _) <- mixedList' (-1)
+ return $ head bl
+
+mixedList' :: PandocMonad m => Int -> VwParser m ([Blocks], Int)
+mixedList' prevInd = do
+ (curInd, builder) <- option (-1, "na") (lookAhead listStart)
+ if curInd < prevInd
+ then return ([], curInd)
+ else do
+ listStart
+ curLine <- listItemContent
+ let listBuilder =
+ if builder == "ul" then B.bulletList else B.orderedList
+ (subList, lowInd) <- (mixedList' curInd)
+ if lowInd >= curInd
+ then do
+ (sameIndList, endInd) <- (mixedList' lowInd)
+ let curList = (combineList curLine subList) ++ sameIndList
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+ else do
+ let (curList, endInd) = ((combineList curLine subList),
+ lowInd)
+ if curInd > prevInd
+ then return ([listBuilder curList], endInd)
+ else return (curList, endInd)
+
+plainInlineML' :: PandocMonad m => Inlines -> VwParser m Blocks
+plainInlineML' w = do
+ xs <- many inlineML
+ newline
+ return $ B.plain $ trimInlines $ mconcat $ w:xs
+
+plainInlineML :: PandocMonad m => VwParser m Blocks
+plainInlineML = (notFollowedBy listStart) >> spaceChar >> plainInlineML' mempty
+
+
+listItemContent :: PandocMonad m => VwParser m Blocks
+listItemContent = try $ do
+ w <- option mempty listTodoMarker
+ x <- plainInlineML' w
+ y <- many blocksThenInline
+ z <- many blockML
+ return $ mconcat $ x:y ++ z
+
+blocksThenInline :: PandocMonad m => VwParser m Blocks
+blocksThenInline = try $ do
+ y <- many1 blockML
+ x <- plainInlineML
+ return $ mconcat $ y ++ [x]
+
+listTodoMarker :: PandocMonad m => VwParser m Inlines
+listTodoMarker = try $ do
+ x <- between (many spaceChar >> char '[') (char ']' >> spaceChar)
+ (oneOf " .oOX")
+ return $ makeListMarkerSpan x
+
+makeListMarkerSpan :: Char -> Inlines
+makeListMarkerSpan x =
+ let cl = case x of
+ ' ' -> "done0"
+ '.' -> "done1"
+ 'o' -> "done2"
+ 'O' -> "done3"
+ 'X' -> "done4"
+ _ -> ""
+ in
+ B.spanWith ("", [cl], []) mempty
+
+combineList :: Blocks -> [Blocks] -> [Blocks]
+combineList x [y] = case toList y of
+ [BulletList z] -> [fromList $ (toList x)
+ ++ [BulletList z]]
+ [OrderedList attr z] -> [fromList $ (toList x)
+ ++ [OrderedList attr z]]
+ _ -> x:[y]
+combineList x xs = x:xs
+
+listStart :: PandocMonad m => VwParser m (Int, String)
+listStart = try $ do
+ s <- many spaceChar
+ listType <- bulletListMarkers <|> orderedListMarkers
+ spaceChar
+ return (length s, listType)
+
+bulletListMarkers :: PandocMonad m => VwParser m String
+bulletListMarkers = "ul" <$ (char '*' <|> char '-')
+
+orderedListMarkers :: PandocMonad m => VwParser m String
+orderedListMarkers =
+ ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen)
+ <$> orderedListMarker
+ <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
+ <|> ("ol" <$ char '#')
+
+--many need trimInlines
+table :: PandocMonad m => VwParser m Blocks
+table = try $ do
+ indent <- lookAhead (many spaceChar)
+ (th, trs) <- table1 <|> table2
+ let tab = B.simpleTable th trs
+ if indent == ""
+ then return tab
+ else return $ B.divWith ("", ["center"], []) tab
+
+-- table with header
+table1 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table1 = try $ do
+ th <- tableRow
+ many1 tableHeaderSeparator
+ trs <- many tableRow
+ return (th, trs)
+
+-- headerless table
+table2 :: PandocMonad m => VwParser m ([Blocks], [[Blocks]])
+table2 = try $ do
+ trs <- many1 tableRow
+ return (take (length $ head trs) $ repeat mempty, trs)
+
+tableHeaderSeparator :: PandocMonad m => VwParser m ()
+tableHeaderSeparator = try $ do
+ many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|')
+ >> many spaceChar >> newline
+ return ()
+
+tableRow :: PandocMonad m => VwParser m [Blocks]
+tableRow = try $ do
+ many spaceChar >> char '|'
+ s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
+ >> newline))
+ guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
+ tr <- many tableCell
+ many spaceChar >> char '\n'
+ return tr
+
+tableCell :: PandocMonad m => VwParser m Blocks
+tableCell = try $
+ B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|'))
+
+placeholder :: PandocMonad m => VwParser m ()
+placeholder = try $
+ (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh
+
+ph :: PandocMonad m => String -> VwParser m ()
+ph s = try $ do
+ many spaceChar >> (string $ '%':s) >> spaceChar
+ contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline)))
+ --use lookAhead because of placeholder in the whitespace parser
+ let meta' = return $ B.setMeta s contents nullMeta :: F Meta
+ updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' }
+
+noHtmlPh :: PandocMonad m => VwParser m ()
+noHtmlPh = try $
+ () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
+ >> (lookAhead newline))
+
+templatePh :: PandocMonad m => VwParser m ()
+templatePh = try $
+ () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n")
+ >> (lookAhead newline))
+
+-- inline parser
+
+inline :: PandocMonad m => VwParser m Inlines
+inline = choice $ (whitespace endlineP):inlineList
+
+inlineList :: PandocMonad m => [VwParser m Inlines]
+inlineList = [ bareURL
+ , todoMark
+ , str
+ , strong
+ , emph
+ , strikeout
+ , code
+ , link
+ , image
+ , inlineMath
+ , tag
+ , superscript
+ , subscript
+ , special
+ ]
+
+-- inline parser without softbreaks or comment breaks
+inline' :: PandocMonad m => VwParser m Inlines
+inline' = choice $ whitespace':inlineList
+
+-- inline parser for blockquotes
+inlineBQ :: PandocMonad m => VwParser m Inlines
+inlineBQ = choice $ (whitespace endlineBQ):inlineList
+
+-- inline parser for mixedlists
+inlineML :: PandocMonad m => VwParser m Inlines
+inlineML = choice $ (whitespace endlineML):inlineList
+
+str :: PandocMonad m => VwParser m Inlines
+str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars)
+
+whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
+whitespace endline = B.space <$ (skipMany1 spaceChar <|>
+ (try (newline >> (comment <|> placeholder))))
+ <|> B.softbreak <$ endline
+
+whitespace' :: PandocMonad m => VwParser m Inlines
+whitespace' = B.space <$ skipMany1 spaceChar
+
+special :: PandocMonad m => VwParser m Inlines
+special = B.str <$> count 1 (oneOf specialChars)
+
+bareURL :: PandocMonad m => VwParser m Inlines
+bareURL = try $ do
+ (orig, src) <- uri <|> emailAddress
+ return $ B.link src "" (B.str orig)
+
+strong :: PandocMonad m => VwParser m Inlines
+strong = try $ do
+ s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*")
+ guard $ (not $ (head s) `elem` spaceChars)
+ && (not $ (last s) `elem` spaceChars)
+ char '*'
+ contents <- mconcat <$> (manyTill inline' $ char '*'
+ >> notFollowedBy alphaNum)
+ return $ (B.spanWith ((makeId contents), [], []) mempty)
+ <> (B.strong contents)
+
+makeId :: Inlines -> String
+makeId i = concat (stringify <$> (toList i))
+
+emph :: PandocMonad m => VwParser m Inlines
+emph = try $ do
+ s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_")
+ guard $ (not $ (head s) `elem` spaceChars)
+ && (not $ (last s) `elem` spaceChars)
+ char '_'
+ contents <- mconcat <$> (manyTill inline' $ char '_'
+ >> notFollowedBy alphaNum)
+ return $ B.emph contents
+
+strikeout :: PandocMonad m => VwParser m Inlines
+strikeout = try $ do
+ string "~~"
+ contents <- mconcat <$> (many1Till inline' $ string $ "~~")
+ return $ B.strikeout contents
+
+code :: PandocMonad m => VwParser m Inlines
+code = try $ do
+ char '`'
+ contents <- many1Till (noneOf "\n") (char '`')
+ return $ B.code contents
+
+superscript :: PandocMonad m => VwParser m Inlines
+superscript = try $
+ B.superscript <$> mconcat <$> (char '^' >> many1Till inline' (char '^'))
+
+subscript :: PandocMonad m => VwParser m Inlines
+subscript = try $
+ B.subscript <$> mconcat <$> (string ",,"
+ >> many1Till inline' (try $ string ",,"))
+
+link :: PandocMonad m => VwParser m Inlines
+link = try $ do
+ string "[["
+ contents <- lookAhead $ manyTill anyChar (string "]]")
+ case '|' `elem` contents of
+ False -> do
+ manyTill anyChar (string "]]")
+-- not using try here because [[hell]o]] is not rendered as a link in vimwiki
+ return $ B.link (procLink contents) "" (B.str contents)
+ True -> do
+ url <- manyTill anyChar $ char '|'
+ lab <- mconcat <$> (manyTill inline $ string "]]")
+ return $ B.link (procLink url) "" lab
+
+image :: PandocMonad m => VwParser m Inlines
+image = try $ do
+ string "{{"
+ contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}")
+ images $ length $ filter (== '|') contentText
+
+images :: PandocMonad m => Int -> VwParser m Inlines
+images k
+ | k == 0 = do
+ imgurl <- manyTill anyChar (try $ string "}}")
+ return $ B.image (procImgurl imgurl) "" (B.str "")
+ | k == 1 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ (try $ string "}}"))
+ return $ B.image (procImgurl imgurl) "" alt
+ | k == 2 = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ char '|')
+ attrText <- manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+ | otherwise = do
+ imgurl <- manyTill anyChar (char '|')
+ alt <- mconcat <$> (manyTill inline $ char '|')
+ attrText <- manyTill anyChar (char '|')
+ manyTill anyChar (try $ string "}}")
+ return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
+
+procLink' :: String -> String
+procLink' s
+ | ((take 6 s) == "local:") = "file" ++ (drop 5 s)
+ | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html"
+ | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+ "news:", "telnet:" ])
+ = s
+ | s == "" = ""
+ | (last s) == '/' = s
+ | otherwise = s ++ ".html"
+
+procLink :: String -> String
+procLink s = procLink' x ++ y
+ where (x, y) = break (=='#') s
+
+procImgurl :: String -> String
+procImgurl s = if ((take 6 s) == "local:") then "file" ++ (drop 5 s) else s
+
+inlineMath :: PandocMonad m => VwParser m Inlines
+inlineMath = try $ do
+ char '$'
+ contents <- many1Till (noneOf "\n") (char '$')
+ return $ B.math contents
+
+tag :: PandocMonad m => VwParser m Inlines
+tag = try $ do
+ char ':'
+ s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space)))
+ guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
+ let ss = splitBy (==':') s
+ return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss))
+
+todoMark :: PandocMonad m => VwParser m Inlines
+todoMark = try $ do
+ string "TODO:"
+ return $ B.spanWith ("", ["todo"], []) (B.str "TODO:")
+
+-- helper functions and parsers
+endlineP :: PandocMonad m => VwParser m ()
+endlineP = () <$ try (newline <* nFBTTBSB <* notFollowedBy blockQuote)
+
+endlineBQ :: PandocMonad m => VwParser m ()
+endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ")
+
+endlineML :: PandocMonad m => VwParser m ()
+endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar)
+
+--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks
+nFBTTBSB :: PandocMonad m => VwParser m ()
+nFBTTBSB =
+ notFollowedBy newline <*
+ notFollowedBy hrule <*
+ notFollowedBy tableRow <*
+ notFollowedBy header <*
+ notFollowedBy listStart <*
+ notFollowedBy preformatted <*
+ notFollowedBy displayMath <*
+ notFollowedBy hasDefMarker
+
+hasDefMarker :: PandocMonad m => VwParser m ()
+hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars))
+
+makeTagSpan' :: String -> Inlines
+makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
+ B.spanWith (s, ["tag"], []) (B.str s)
+
+makeTagSpan :: String -> Inlines
+makeTagSpan s = (B.space) <> (makeTagSpan' s)
+
+mathTagParser :: PandocMonad m => VwParser m String
+mathTagParser = do
+ s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars)
+ (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)))
+ char '%' >> string s >> char '%'
+ return $ mathTagLaTeX s