aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorYan Pas <yanp.bugz@gmail.com>2018-10-07 18:10:01 +0300
committerYan Pas <yanp.bugz@gmail.com>2018-10-07 18:10:01 +0300
commit27467189ab184c5d098e244e01f7d1bfdb0d4d45 (patch)
treed1fb96ebbc49ee0c4e73ef354feddd521690d545 /src/Text/Pandoc/Readers
parent4f3dd3b1af7217214287ab886147c5e33a54774d (diff)
parentbd8a66394bc25b52dca9ffd963a560a4ca492f9c (diff)
downloadpandoc-27467189ab184c5d098e244e01f7d1bfdb0d4d45.tar.gz
Merge branch 'master' into groff_reader
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs64
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs2
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs66
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs5
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs13
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs53
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs14
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1474
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs173
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs668
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs6
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs192
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs514
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs4
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs8
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs139
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs14
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs15
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs4
-rw-r--r--src/Text/Pandoc/Readers/RST.hs68
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs111
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs9
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs18
34 files changed, 2090 insertions, 1595 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 79a4abbc2..9c4f7a8ac 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -39,10 +39,12 @@ import Control.Monad.State
import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
import Data.List (groupBy)
import qualified Data.Map as Map
+import Data.Maybe (mapMaybe)
import Data.Text (Text, unpack)
+import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Options
import Text.Pandoc.Shared (stringify)
import Text.Pandoc.Walk (walkM)
@@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM)
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
(if isEnabled Ext_gfm_auto_identifiers opts
- then addHeaderIdentifiers
+ then addHeaderIdentifiers opts
else id) $
nodeToPandoc opts $ commonmarkToNode opts' exts s
where opts' = [ optSmart | isEnabled Ext_smart opts ]
@@ -59,24 +61,27 @@ readCommonMark opts s = return $
[ extTable | isEnabled Ext_pipe_tables opts ] ++
[ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
-convertEmojis :: String -> String
-convertEmojis (':':xs) =
+convertEmojis :: String -> [Inline]
+convertEmojis s@(':':xs) =
case break (==':') xs of
(ys,':':zs) ->
- case Map.lookup ys emojis of
- Just s -> s ++ convertEmojis zs
- Nothing -> ':' : ys ++ convertEmojis (':':zs)
- _ -> ':':xs
-convertEmojis (x:xs) = x : convertEmojis xs
-convertEmojis [] = []
-
-addHeaderIdentifiers :: Pandoc -> Pandoc
-addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
-
-addHeaderId :: Block -> State (Map.Map String Int) Block
-addHeaderId (Header lev (_,classes,kvs) ils) = do
+ case emojiToInline ys of
+ Just em -> em : convertEmojis zs
+ Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+ _ -> [Str s]
+convertEmojis s =
+ case break (==':') s of
+ ("","") -> []
+ (_,"") -> [Str s]
+ (xs,ys) -> Str xs:convertEmojis ys
+
+addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
+addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
+
+addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId opts (Header lev (_,classes,kvs) ils) = do
idmap <- get
- let ident = toIdent ils
+ let ident = toIdent opts ils
ident' <- case Map.lookup ident idmap of
Nothing -> do
put (Map.insert ident 1 idmap)
@@ -85,13 +90,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do
put (Map.adjust (+ 1) ident idmap)
return (ident ++ "-" ++ show i)
return $ Header lev (ident',classes,kvs) ils
-addHeaderId x = return x
+addHeaderId _ x = return x
-toIdent :: [Inline] -> String
-toIdent = map (\c -> if isSpace c then '-' else c)
- . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
- c == '_' || c == '-')
- . map toLower . stringify
+toIdent :: ReaderOptions -> [Inline] -> String
+toIdent opts = map (\c -> if isSpace c then '-' else c)
+ . filterer
+ . map toLower . stringify
+ where filterer = if isEnabled Ext_ascii_identifiers opts
+ then mapMaybe toAsciiChar
+ else filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+ c == '_' || c == '-')
nodeToPandoc :: ReaderOptions -> Node -> Pandoc
nodeToPandoc opts (Node _ DOCUMENT nodes) =
@@ -200,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline]
addInlines opts = foldr (addInline opts) []
addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
-addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str $ if isEnabled Ext_emoji opts
- then convertEmojis xs
- else xs
+ toinl (' ':_) = [Space]
+ toinl xs = if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else [Str xs]
addInline _ (Node _ LINEBREAK _) = (LineBreak :)
addInline opts (Node _ SOFTBREAK _)
| isEnabled Ext_hard_line_breaks opts = (LineBreak :)
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 4fd38c0fd..a337bf937 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -2,7 +2,7 @@
{-
Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de>
- partly based on all the other readers, especialy the work by
+ partly based on all the other readers, especially the work by
John MacFarlane <jgm@berkeley.edu> and
Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
all bugs are solely created by me.
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3d48c7ee8..b7bd71754 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -537,7 +537,6 @@ type DB m = StateT DBState m
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
, dbMeta :: Meta
- , dbAcceptsMeta :: Bool
, dbBook :: Bool
, dbFigureTitle :: Inlines
, dbContent :: [Content]
@@ -547,7 +546,6 @@ instance Default DBState where
def = DBState{ dbSectionLevel = 0
, dbQuoteType = DoubleQuote
, dbMeta = mempty
- , dbAcceptsMeta = False
, dbBook = False
, dbFigureTitle = mempty
, dbContent = [] }
@@ -609,18 +607,26 @@ named s e = qName (elName e) == s
--
-acceptingMetadata :: PandocMonad m => DB m a -> DB m a
-acceptingMetadata p = do
- modify (\s -> s { dbAcceptsMeta = True } )
- res <- p
- modify (\s -> s { dbAcceptsMeta = False })
- return res
-
-checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
-checkInMeta p = do
- accepts <- dbAcceptsMeta <$> get
- when accepts p
- return mempty
+addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks
+addMetadataFromElement e = do
+ case filterChild (named "title") e of
+ Nothing -> return ()
+ Just z -> do
+ getInlines z >>= addMeta "title"
+ addMetaField "subtitle" z
+ case filterChild (named "authorgroup") e of
+ Nothing -> return ()
+ Just z -> addMetaField "author" z
+ addMetaField "subtitle" e
+ addMetaField "author" e
+ addMetaField "date" e
+ addMetaField "release" e
+ return mempty
+ where addMetaField fieldname elt =
+ case filterChildren (named fieldname) elt of
+ [] -> return ()
+ [z] -> getInlines z >>= addMeta fieldname
+ zs -> mapM getInlines zs >>= addMeta fieldname
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
addMeta field val = modify (setMeta field val)
@@ -718,11 +724,6 @@ parseBlock (Elem e) =
"attribution" -> return mempty
"titleabbrev" -> return mempty
"authorinitials" -> return mempty
- "title" -> checkInMeta getTitle
- "author" -> checkInMeta getAuthor
- "authorgroup" -> checkInMeta getAuthorGroup
- "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
- "date" -> checkInMeta getDate
"bibliography" -> sect 0
"bibliodiv" -> sect 1
"biblioentry" -> parseMixed para (elContent e)
@@ -788,8 +789,8 @@ parseBlock (Elem e) =
"figure" -> getFigure e
"mediaobject" -> para <$> getMediaobject e
"caption" -> return mempty
- "info" -> metaBlock
- "articleinfo" -> metaBlock
+ "info" -> addMetadataFromElement e
+ "articleinfo" -> addMetadataFromElement e
"sectioninfo" -> return mempty -- keywords & other metadata
"refsectioninfo" -> return mempty -- keywords & other metadata
"refsect1info" -> return mempty -- keywords & other metadata
@@ -803,10 +804,11 @@ parseBlock (Elem e) =
"chapterinfo" -> return mempty -- keywords & other metadata
"glossaryinfo" -> return mempty -- keywords & other metadata
"appendixinfo" -> return mempty -- keywords & other metadata
- "bookinfo" -> metaBlock
+ "bookinfo" -> addMetadataFromElement e
"article" -> modify (\st -> st{ dbBook = False }) >>
- getBlocks e
- "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
+ addMetadataFromElement e >> getBlocks e
+ "book" -> modify (\st -> st{ dbBook = True }) >>
+ addMetadataFromElement e >> getBlocks e
"table" -> parseTable
"informaltable" -> parseTable
"informalexample" -> divWith ("", ["informalexample"], []) <$>
@@ -816,6 +818,8 @@ parseBlock (Elem e) =
"screen" -> codeBlockWithLang
"programlisting" -> codeBlockWithLang
"?xml" -> return mempty
+ "title" -> return mempty -- handled in parent element
+ "subtitle" -> return mempty -- handled in parent element
_ -> getBlocks e
where parseMixed container conts = do
let (ils,rest) = break isBlockElement conts
@@ -857,19 +861,6 @@ parseBlock (Elem e) =
terms' <- mapM getInlines terms
items' <- mapM getBlocks items
return (mconcat $ intersperse (str "; ") terms', items')
- getTitle = do
- tit <- getInlines e
- subtit <- case filterChild (named "subtitle") e of
- Just s -> (text ": " <>) <$>
- getInlines s
- Nothing -> return mempty
- addMeta "title" (tit <> subtit)
-
- getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
- getAuthorGroup = do
- let terms = filterChildren (named "author") e
- mapM getInlines terms >>= addMeta "author"
- getDate = getInlines e >>= addMeta "date"
parseTable = do
let isCaption x = named "title" x || named "caption" x
caption <- case filterChild isCaption e of
@@ -935,7 +926,6 @@ parseBlock (Elem e) =
modify $ \st -> st{ dbSectionLevel = n - 1 }
return $ headerWith (ident,[],[]) n' headerText <> b
lineItems = mapM getInlines $ filterChildren (named "line") e
- metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = (trimInlines . mconcat) <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 49ea71601..0be363f3d 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -183,14 +183,13 @@ blocksToDefinitions' defAcc acc
pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
in
blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
+blocksToDefinitions' ((defTerm, defItems):defs) acc
(Div (ident2, classes2, kvs2) blks2 : blks)
- | (not . null) defAcc && "Definition" `elem` classes2 =
+ | "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
defItems2 = case remainingAttr2 == ("", [], []) of
True -> blks2
False -> [Div remainingAttr2 blks2]
- ((defTerm, defItems):defs) = defAcc
defAcc' = case null defItems of
True -> (defTerm, [defItems2]) : defs
False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 4c4c06073..b4e52de14 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -785,7 +785,7 @@ So we do this in a number of steps. If we encounter the fldchar begin
tag, we start open a fldchar state variable (see state above). We add
the instrtext to it as FieldInfo. Then we close that and start adding
the runs when we get to separate. Then when we get to end, we produce
-the Field type with approriate FieldInfo and Runs.
+the Field type with appropriate FieldInfo and Runs.
-}
elemToParPart ns element
| isElem ns "w" "r" element
@@ -1056,8 +1056,10 @@ elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
- isBold = checkOnOff ns rPr (elemName ns "w" "b")
- , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
+ isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "bCs")
+ , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "iCs")
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, rVertAlign =
@@ -1153,8 +1155,9 @@ getSymChar :: NameSpaces -> Element -> RunElem
getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
- let [(char, _)] = readLitChar ("\\x" ++ s) in
- TextRun . maybe "" (:[]) $ getUnicode font char
+ case readLitChar ("\\x" ++ s) of
+ [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ _ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
getFont = stringToFont =<< findAttrByName ns "w" "font" element
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index c26447641..bfc3fc3ee 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -73,7 +73,7 @@ readEPUB opts bytes = case toArchiveOrFail bytes of
-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept
--- Note that internal reference are aggresively normalised so that all ids
+-- Note that internal reference are aggressively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32a1ba5a6..b06e07a80 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
+import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
- Ext_native_spans, Ext_raw_html, Ext_line_blocks),
+ Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
@@ -102,7 +104,8 @@ readHtml opts inp = do
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+ (HTMLState def{ stateOptions = opts }
+ [] Nothing Set.empty M.empty [] M.empty)
"source" tags
case result of
Right doc -> return doc
@@ -124,7 +127,8 @@ data HTMLState =
baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String,
- logMessages :: [LogMessage]
+ logMessages :: [LogMessage],
+ macros :: M.Map Text Macro
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -659,6 +663,7 @@ inline = choice
, pCode
, pSpan
, pMath False
+ , pScriptMath
, pRawHtmlInline
]
@@ -745,18 +750,18 @@ pLink = try $ do
let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
maybeFromAttrib "id" tag
let cls = words $ T.unpack $ fromAttrib "class" tag
- lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+ lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
Nothing ->
- return $ B.spanWith (uid, cls, []) lab
+ return $ extractSpaces (B.spanWith (uid, cls, [])) lab
Just url' -> do
mbBaseHref <- baseHref <$> getState
let url = case (parseURIReference url', mbBaseHref) of
(Just rel, Just bs) ->
show (rel `nonStrictRelativeTo` bs)
_ -> url'
- return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+ return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
@@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)]
toStringAttr = map go
where go (x,y) = (T.unpack x, T.unpack y)
+pScriptMath :: PandocMonad m => TagParser m Inlines
+pScriptMath = try $ do
+ TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True)
+ isdisplay <- case lookup "type" attr' of
+ Just x | "math/tex" `T.isPrefixOf` x
+ -> return $ "display" `T.isSuffixOf` x
+ _ -> mzero
+ contents <- T.unpack . innerText <$>
+ manyTill pAnyTag (pSatisfy (matchTagClose "script"))
+ return $ (if isdisplay then B.displayMath else B.math) contents
+
pMath :: PandocMonad m => Bool -> TagParser m Inlines
pMath inCase = try $ do
open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True)
@@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do
pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
--- parses p, preceeded by an optional opening tag
+-- parses p, preceded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a
pOptInTag tagtype p = try $ do
@@ -907,9 +923,25 @@ pTagContents =
<|> pStr
<|> pSpace
<|> smartPunctuation pTagContents
+ <|> pRawTeX
<|> pSymbol
<|> pBad
+pRawTeX :: PandocMonad m => InlinesParser m Inlines
+pRawTeX = do
+ lookAhead $ try $ do
+ char '\\'
+ choice $ map (try . string) ["begin", "eqref", "ref"]
+ guardEnabled Ext_raw_tex
+ inp <- getInput
+ st <- getState
+ res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+ case res of
+ Left _ -> mzero
+ Right (contents, raw) -> do
+ _ <- count (length raw) anyChar
+ return $ B.rawInline "tex" contents
+
pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
@@ -923,6 +955,7 @@ isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
isSpecial '$' = True
+isSpecial '\\' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
@@ -1249,6 +1282,10 @@ isSpace _ = False
-- Instances
+instance HasMacros HTMLState where
+ extractMacros = macros
+ updateMacros f st = st{ macros = f $ macros st }
+
instance HasIdentifierList HTMLState where
extractIdentifierList = identifiers
updateIdentifierList f s = s{ identifiers = f (identifiers s) }
@@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState
--- For now we need a special verison here; the one in Shared has String type
+-- For now we need a special version here; the one in Shared has String type
renderTags' :: [Tag Text] -> Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 967037e4e..072bab350 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -44,11 +44,7 @@ readHaddockEither :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Either PandocError Pandoc
readHaddockEither _opts =
-#if MIN_VERSION_haddock_library(1,2,0)
- Right . B.doc . docHToBlocks . _doc . parseParas
-#else
- Right . B.doc . docHToBlocks . parseParas
-#endif
+ Right . B.doc . docHToBlocks . _doc . parseParas Nothing
docHToBlocks :: DocH String Identifier -> Blocks
docHToBlocks d' =
@@ -68,10 +64,8 @@ docHToBlocks d' =
DocEmphasis _ -> inlineFallback
DocMonospaced _ -> inlineFallback
DocBold _ -> inlineFallback
-#if MIN_VERSION_haddock_library(1,4,0)
DocMathInline _ -> inlineFallback
DocMathDisplay _ -> inlineFallback
-#endif
DocHeader h -> B.header (headerLevel h)
(docHToInlines False $ headerTitle h)
DocUnorderedList items -> B.bulletList (map docHToBlocks items)
@@ -87,7 +81,6 @@ docHToBlocks d' =
DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
DocExamples es -> mconcat $ map (\e ->
makeExample ">>>" (exampleExpression e) (exampleResult e)) es
-#if MIN_VERSION_haddock_library(1,5,0)
DocTable H.Table{ tableHeaderRows = headerRows
, tableBodyRows = bodyRows
}
@@ -100,7 +93,6 @@ docHToBlocks d' =
colspecs = replicate (maximum (map length body))
(AlignDefault, 0.0)
in B.table mempty colspecs header body
-#endif
where inlineFallback = B.plain $ docHToInlines False d'
consolidatePlains = B.fromList . consolidatePlains' . B.toList
@@ -133,10 +125,8 @@ docHToInlines isCode d' =
DocMonospaced (DocString s) -> B.code s
DocMonospaced d -> docHToInlines True d
DocBold d -> B.strong (docHToInlines isCode d)
-#if MIN_VERSION_haddock_library(1,4,0)
DocMathInline s -> B.math s
DocMathDisplay s -> B.displayMath s
-#endif
DocHeader _ -> mempty
DocUnorderedList _ -> mempty
DocOrderedList _ -> mempty
@@ -149,9 +139,7 @@ docHToInlines isCode d' =
DocAName s -> B.spanWith (s,["anchor"],[]) mempty
DocProperty _ -> mempty
DocExamples _ -> mempty
-#if MIN_VERSION_haddock_library(1,5,0)
DocTable _ -> mempty
-#endif
-- | Create an 'Example', stripping superfluous characters as appropriate
makeExample :: String -> String -> [String] -> Blocks
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 59af76d23..695c86b5d 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -191,7 +191,7 @@ parseBlock (Elem e) =
listType -> do
let start = fromMaybe 1 $
(strContent <$> (filterElement (named "list-item") e
- >>= filterElement (named "lable")))
+ >>= filterElement (named "label")))
>>= safeRead
orderedListWith (start, parseListStyleType listType, DefaultDelim)
<$> listitems
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 39dffde76..7c5619165 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Prelude
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Control.Monad.Trans (lift)
-import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper)
+import Data.Char (isDigit, isLetter, toLower, toUpper)
import Data.Default
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
@@ -63,7 +62,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
readFileFromDirs, report, setResourcePath,
setTranslations, translateTerm, trace)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
@@ -71,12 +70,15 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
- Tok (..), TokType (..))
+ ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+ babelLangToBCP47)
import Text.Pandoc.Shared
import qualified Text.Pandoc.Translations as Translations
import Text.Pandoc.Walk
-import Text.Parsec.Pos
import qualified Text.Pandoc.Builder as B
+import qualified Data.Text.Normalize as Normalize
-- for debugging:
-- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -137,482 +139,49 @@ resolveRefs _ x = x
-- Left e -> error (show e)
-- Right r -> return r
-newtype HeaderNum = HeaderNum [Int]
- deriving (Show)
-
-renderHeaderNum :: HeaderNum -> String
-renderHeaderNum (HeaderNum xs) =
- intercalate "." (map show xs)
-
-incrementHeaderNum :: Int -> HeaderNum -> HeaderNum
-incrementHeaderNum level (HeaderNum ns) = HeaderNum $
- case reverse (take level (ns ++ repeat 0)) of
- (x:xs) -> reverse (x+1 : xs)
- [] -> [] -- shouldn't happen
-
-data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
- , sMeta :: Meta
- , sQuoteContext :: QuoteContext
- , sMacros :: M.Map Text Macro
- , sContainers :: [String]
- , sHeaders :: M.Map Inlines String
- , sLogMessages :: [LogMessage]
- , sIdentifiers :: Set.Set String
- , sVerbatimMode :: Bool
- , sCaption :: Maybe Inlines
- , sInListItem :: Bool
- , sInTableCell :: Bool
- , sLastHeaderNum :: HeaderNum
- , sLabels :: M.Map String [Inline]
- , sHasChapters :: Bool
- , sToggles :: M.Map String Bool
- }
- deriving Show
-
-defaultLaTeXState :: LaTeXState
-defaultLaTeXState = LaTeXState{ sOptions = def
- , sMeta = nullMeta
- , sQuoteContext = NoQuote
- , sMacros = M.empty
- , sContainers = []
- , sHeaders = M.empty
- , sLogMessages = []
- , sIdentifiers = Set.empty
- , sVerbatimMode = False
- , sCaption = Nothing
- , sInListItem = False
- , sInTableCell = False
- , sLastHeaderNum = HeaderNum []
- , sLabels = M.empty
- , sHasChapters = False
- , sToggles = M.empty
- }
-
-instance PandocMonad m => HasQuoteContext LaTeXState m where
- getQuoteContext = sQuoteContext <$> getState
- withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = sQuoteContext oldState
- setState oldState { sQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { sQuoteContext = oldQuoteContext }
- return result
-
-instance HasLogMessages LaTeXState where
- addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
- getLogMessages st = reverse $ sLogMessages st
-
-instance HasIdentifierList LaTeXState where
- extractIdentifierList = sIdentifiers
- updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
-
-instance HasIncludeFiles LaTeXState where
- getIncludeFiles = sContainers
- addIncludeFile f s = s{ sContainers = f : sContainers s }
- dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
-
-instance HasHeaderMap LaTeXState where
- extractHeaderMap = sHeaders
- updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
-
-instance HasMacros LaTeXState where
- extractMacros st = sMacros st
- updateMacros f st = st{ sMacros = f (sMacros st) }
-
-instance HasReaderOptions LaTeXState where
- extractReaderOptions = sOptions
-
-instance HasMeta LaTeXState where
- setMeta field val st =
- st{ sMeta = setMeta field val $ sMeta st }
- deleteMeta field st =
- st{ sMeta = deleteMeta field $ sMeta st }
-
-instance Default LaTeXState where
- def = defaultLaTeXState
-
-type LP m = ParserT [Tok] LaTeXState m
-
-withVerbatimMode :: PandocMonad m => LP m a -> LP m a
-withVerbatimMode parser = do
- updateState $ \st -> st{ sVerbatimMode = True }
- result <- parser
- updateState $ \st -> st{ sVerbatimMode = False }
- return result
-
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => LP m a -> LP m a -> ParserT String s m (a, String)
-rawLaTeXParser parser valParser = do
- inp <- getInput
- let toks = tokenize "source" $ T.pack inp
- pstate <- getState
- let lstate = def{ sOptions = extractReaderOptions pstate }
- let lstate' = lstate { sMacros = extractMacros pstate }
- let rawparser = (,) <$> withRaw valParser <*> getState
- res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
- case res' of
- Left _ -> mzero
- Right toks' -> do
- res <- lift $ runParserT (do doMacros 0
- -- retokenize, applying macros
- ts <- many (satisfyTok (const True))
- setInput ts
- rawparser)
- lstate' "chunk" toks'
- case res of
- Left _ -> mzero
- Right ((val, raw), st) -> do
- updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
- return (val, T.unpack (untokenize raw))
-
-applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => String -> ParserT String s m String
-applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = doMacros 0 *>
- (toksToString <$> many (satisfyTok (const True)))
- pstate <- getState
- let lstate = def{ sOptions = extractReaderOptions pstate
- , sMacros = extractMacros pstate }
- res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
- case res of
- Left e -> fail (show e)
- Right s' -> return s'
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT String s m String
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
- -- we don't want to apply newly defined latex macros to their own
- -- definitions:
- snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks
+ snd <$> (rawLaTeXParser False macroDef blocks
+ <|> (rawLaTeXParser True
+ (do choice (map controlSeq
+ ["include", "input", "subfile", "usepackage"])
+ skipMany opt
+ braced
+ return mempty) blocks)
+ <|> rawLaTeXParser True
+ (environment <|> blockCommand)
+ (mconcat <$> (many (block <|> beginOrEndCommand))))
+
+-- See #4667 for motivation; sometimes people write macros
+-- that just evaluate to a begin or end command, which blockCommand
+-- won't accept.
+beginOrEndCommand :: PandocMonad m => LP m Blocks
+beginOrEndCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name == "begin" || name == "end"
+ (envname, rawargs) <- withRaw braced
+ if M.member (untokenize envname)
+ (inlineEnvironments :: M.Map Text (LP PandocPure Inlines))
+ then mzero
+ else return $ rawBlock "latex"
+ (T.unpack (txt <> untokenize rawargs))
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT String s m String
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
- snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
+ snd <$> ( rawLaTeXParser True
+ (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+ inlines
+ <|> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines)
inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
- fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
-
-tokenize :: SourceName -> Text -> [Tok]
-tokenize sourcename = totoks (initialPos sourcename)
-
-totoks :: SourcePos -> Text -> [Tok]
-totoks pos t =
- case T.uncons t of
- Nothing -> []
- Just (c, rest)
- | c == '\n' ->
- Tok pos Newline "\n"
- : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
- | isSpaceOrTab c ->
- let (sps, rest') = T.span isSpaceOrTab t
- in Tok pos Spaces sps
- : totoks (incSourceColumn pos (T.length sps))
- rest'
- | isAlphaNum c ->
- let (ws, rest') = T.span isAlphaNum t
- in Tok pos Word ws
- : totoks (incSourceColumn pos (T.length ws)) rest'
- | c == '%' ->
- let (cs, rest') = T.break (== '\n') rest
- in Tok pos Comment ("%" <> cs)
- : totoks (incSourceColumn pos (1 + T.length cs)) rest'
- | c == '\\' ->
- case T.uncons rest of
- Nothing -> [Tok pos (CtrlSeq " ") "\\"]
- Just (d, rest')
- | isLetterOrAt d ->
- -- \makeatletter is common in macro defs;
- -- ideally we should make tokenization sensitive
- -- to \makeatletter and \makeatother, but this is
- -- probably best for now
- let (ws, rest'') = T.span isLetterOrAt rest
- (ss, rest''') = T.span isSpaceOrTab rest''
- in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
- : totoks (incSourceColumn pos
- (1 + T.length ws + T.length ss)) rest'''
- | isSpaceOrTab d || d == '\n' ->
- let (w1, r1) = T.span isSpaceOrTab rest
- (w2, (w3, r3)) = case T.uncons r1 of
- Just ('\n', r2)
- -> (T.pack "\n",
- T.span isSpaceOrTab r2)
- _ -> (mempty, (mempty, r1))
- ws = "\\" <> w1 <> w2 <> w3
- in case T.uncons r3 of
- Just ('\n', _) ->
- Tok pos (CtrlSeq " ") ("\\" <> w1)
- : totoks (incSourceColumn pos (T.length ws))
- r1
- _ ->
- Tok pos (CtrlSeq " ") ws
- : totoks (incSourceColumn pos (T.length ws))
- r3
- | otherwise ->
- Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
- : totoks (incSourceColumn pos 2) rest'
- | c == '#' ->
- let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
- in case safeRead (T.unpack t1) of
- Just i ->
- Tok pos (Arg i) ("#" <> t1)
- : totoks (incSourceColumn pos (1 + T.length t1)) t2
- Nothing ->
- Tok pos Symbol "#"
- : totoks (incSourceColumn pos 1) t2
- | c == '^' ->
- case T.uncons rest of
- Just ('^', rest') ->
- case T.uncons rest' of
- Just (d, rest'')
- | isLowerHex d ->
- case T.uncons rest'' of
- Just (e, rest''') | isLowerHex e ->
- Tok pos Esc2 (T.pack ['^','^',d,e])
- : totoks (incSourceColumn pos 4) rest'''
- _ ->
- Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
- | d < '\128' ->
- Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
- _ -> Tok pos Symbol "^" :
- Tok (incSourceColumn pos 1) Symbol "^" :
- totoks (incSourceColumn pos 2) rest'
- _ -> Tok pos Symbol "^"
- : totoks (incSourceColumn pos 1) rest
- | otherwise ->
- Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
-
-isSpaceOrTab :: Char -> Bool
-isSpaceOrTab ' ' = True
-isSpaceOrTab '\t' = True
-isSpaceOrTab _ = False
-
-isLetterOrAt :: Char -> Bool
-isLetterOrAt '@' = True
-isLetterOrAt c = isLetter c
-
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-
-untokenize :: [Tok] -> Text
-untokenize = mconcat . map untoken
-
-untoken :: Tok -> Text
-untoken (Tok _ _ t) = t
-
-satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
-satisfyTok f =
- try $ do
- res <- tokenPrim (T.unpack . untoken) updatePos matcher
- doMacros 0 -- apply macros on remaining input stream
- return res
- where matcher t | f t = Just t
- | otherwise = Nothing
- updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
- updatePos _spos _ (Tok pos _ _ : _) = pos
- updatePos spos _ [] = incSourceColumn spos 1
-
-doMacros :: PandocMonad m => Int -> LP m ()
-doMacros n = do
- verbatimMode <- sVerbatimMode <$> getState
- unless verbatimMode $ do
- inp <- getInput
- case inp of
- Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos name ts
- Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos ("end" <> name) ts
- Tok _ (CtrlSeq "expandafter") _ : t : ts
- -> do setInput ts
- doMacros n
- getInput >>= setInput . combineTok t
- Tok spos (CtrlSeq name) _ : ts
- -> handleMacros spos name ts
- _ -> return ()
- where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
- | T.all isLetterOrAt w =
- Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
- where (x1, x2) = T.break isSpaceOrTab x
- combineTok t ts = t:ts
- handleMacros spos name ts = do
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Nothing -> return ()
- Just (Macro expansionPoint numargs optarg newtoks) -> do
- setInput ts
- let getarg = try $ spaces >> bracedOrToken
- args <- case optarg of
- Nothing -> count numargs getarg
- Just o ->
- (:) <$> option o bracketedToks
- <*> count (numargs - 1) getarg
- -- first boolean param is true if we're tokenizing
- -- an argument (in which case we don't want to
- -- expand #1 etc.)
- let addTok False (Tok _ (Arg i) _) acc | i > 0
- , i <= numargs =
- foldr (addTok True) acc (args !! (i - 1))
- -- add space if needed after control sequence
- -- see #4007
- addTok _ (Tok _ (CtrlSeq x) txt)
- acc@(Tok _ Word _ : _)
- | not (T.null txt) &&
- isLetter (T.last txt) =
- Tok spos (CtrlSeq x) (txt <> " ") : acc
- addTok _ t acc = setpos spos t : acc
- ts' <- getInput
- setInput $ foldr (addTok False) ts' newtoks
- case expansionPoint of
- ExpandWhenUsed ->
- if n > 20 -- detect macro expansion loops
- then throwError $ PandocMacroLoop (T.unpack name)
- else doMacros (n + 1)
- ExpandWhenDefined -> return ()
-
-
-setpos :: SourcePos -> Tok -> Tok
-setpos spos (Tok _ tt txt) = Tok spos tt txt
-
-anyControlSeq :: PandocMonad m => LP m Tok
-anyControlSeq = satisfyTok isCtrlSeq
- where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
- isCtrlSeq _ = False
-
-anySymbol :: PandocMonad m => LP m Tok
-anySymbol = satisfyTok isSym
- where isSym (Tok _ Symbol _) = True
- isSym _ = False
-
-spaces :: PandocMonad m => LP m ()
-spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-spaces1 :: PandocMonad m => LP m ()
-spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-tokTypeIn :: [TokType] -> Tok -> Bool
-tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
-
-controlSeq :: PandocMonad m => Text -> LP m Tok
-controlSeq name = satisfyTok isNamed
- where isNamed (Tok _ (CtrlSeq n) _) = n == name
- isNamed _ = False
-
-symbol :: PandocMonad m => Char -> LP m Tok
-symbol c = satisfyTok isc
- where isc (Tok _ Symbol d) = case T.uncons d of
- Just (c',_) -> c == c'
- _ -> False
- isc _ = False
-
-symbolIn :: PandocMonad m => [Char] -> LP m Tok
-symbolIn cs = satisfyTok isInCs
- where isInCs (Tok _ Symbol d) = case T.uncons d of
- Just (c,_) -> c `elem` cs
- _ -> False
- isInCs _ = False
-
-sp :: PandocMonad m => LP m ()
-sp = whitespace <|> endline
-
-whitespace :: PandocMonad m => LP m ()
-whitespace = () <$ satisfyTok isSpaceTok
- where isSpaceTok (Tok _ Spaces _) = True
- isSpaceTok _ = False
-
-newlineTok :: PandocMonad m => LP m ()
-newlineTok = () <$ satisfyTok isNewlineTok
-
-isNewlineTok :: Tok -> Bool
-isNewlineTok (Tok _ Newline _) = True
-isNewlineTok _ = False
-
-comment :: PandocMonad m => LP m ()
-comment = () <$ satisfyTok isCommentTok
- where isCommentTok (Tok _ Comment _) = True
- isCommentTok _ = False
-
-anyTok :: PandocMonad m => LP m Tok
-anyTok = satisfyTok (const True)
-
-endline :: PandocMonad m => LP m ()
-endline = try $ do
- newlineTok
- lookAhead anyTok
- notFollowedBy blankline
-
-blankline :: PandocMonad m => LP m ()
-blankline = try $ skipMany whitespace *> newlineTok
-
-primEscape :: PandocMonad m => LP m Char
-primEscape = do
- Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
- case toktype of
- Esc1 -> case T.uncons (T.drop 2 t) of
- Just (c, _)
- | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
- | otherwise -> return (chr (ord c + 64))
- Nothing -> fail "Empty content of Esc1"
- Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
- Just x -> return (chr x)
- Nothing -> fail $ "Could not read: " ++ T.unpack t
- _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
-
-bgroup :: PandocMonad m => LP m Tok
-bgroup = try $ do
- skipMany sp
- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-
-egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
-
-grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
-grouped parser = try $ do
- bgroup
- -- first we check for an inner 'grouped', because
- -- {{a,b}} should be parsed the same as {a,b}
- try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m [Tok]
-braced = bgroup *> braced' 1
- where braced' (n :: Int) =
- handleEgroup n <|> handleBgroup n <|> handleOther n
- handleEgroup n = do
- t <- egroup
- if n == 1
- then return []
- else (t:) <$> braced' (n - 1)
- handleBgroup n = do
- t <- bgroup
- (t:) <$> braced' (n + 1)
- handleOther n = do
- t <- anyTok
- (t:) <$> braced' n
-
-bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ do
- symbol '['
- mconcat <$> manyTill parser (symbol ']')
-
-dimenarg :: PandocMonad m => LP m Text
-dimenarg = try $ do
- ch <- option False $ True <$ symbol '='
- Tok _ _ s <- satisfyTok isWordTok
- guard $ T.take 2 (T.reverse s) `elem`
- ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- let num = T.take (T.length s - 2) s
- guard $ T.length num > 0
- guard $ T.all isDigit num
- return $ T.pack ['=' | ch] <> s
+ fst <$> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines
-- inline elements:
@@ -625,13 +194,6 @@ regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
isRegularSymbol _ = False
isSpecial c = c `Set.member` specialChars
-specialChars :: Set.Set Char
-specialChars = Set.fromList "#$%&~_^\\{}"
-
-isWordTok :: Tok -> Bool
-isWordTok (Tok _ Word _) = True
-isWordTok _ = False
-
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
@@ -678,7 +240,7 @@ dosiunitx = do
skipopts
value <- tok
valueprefix <- option "" $ bracketed tok
- unit <- inlineCommand' <|> tok
+ unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
let emptyOr160 "" = ""
emptyOr160 _ = "\160"
return . mconcat $ [valueprefix,
@@ -687,11 +249,187 @@ dosiunitx = do
emptyOr160 unit,
unit]
--- siunitx's \square command
-dosquare :: PandocMonad m => LP m Inlines
-dosquare = do
- unit <- inlineCommand' <|> tok
- return . mconcat $ [unit, "\178"]
+siUnit :: PandocMonad m => LP m Inlines
+siUnit = do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ if name == "square"
+ then do
+ unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
+ return . mconcat $ [unit, "\178"]
+ else
+ case M.lookup name siUnitMap of
+ Just il -> return il
+ Nothing -> mzero
+
+siUnitMap :: M.Map Text Inlines
+siUnitMap = M.fromList
+ [ ("fg", str "fg")
+ , ("pg", str "pg")
+ , ("ng", str "ng")
+ , ("ug", str "μg")
+ , ("mg", str "mg")
+ , ("g", str "g")
+ , ("kg", str "kg")
+ , ("amu", str "u")
+ , ("pm", str "pm")
+ , ("nm", str "nm")
+ , ("um", str "μm")
+ , ("mm", str "mm")
+ , ("cm", str "cm")
+ , ("dm", str "dm")
+ , ("m", str "m")
+ , ("km", str "km")
+ , ("as", str "as")
+ , ("fs", str "fs")
+ , ("ps", str "ps")
+ , ("ns", str "ns")
+ , ("us", str "μs")
+ , ("ms", str "ms")
+ , ("s", str "s")
+ , ("fmol", str "fmol")
+ , ("pmol", str "pmol")
+ , ("nmol", str "nmol")
+ , ("umol", str "μmol")
+ , ("mmol", str "mmol")
+ , ("mol", str "mol")
+ , ("kmol", str "kmol")
+ , ("pA", str "pA")
+ , ("nA", str "nA")
+ , ("uA", str "μA")
+ , ("mA", str "mA")
+ , ("A", str "A")
+ , ("kA", str "kA")
+ , ("ul", str "μl")
+ , ("ml", str "ml")
+ , ("l", str "l")
+ , ("hl", str "hl")
+ , ("uL", str "μL")
+ , ("mL", str "mL")
+ , ("L", str "L")
+ , ("hL", str "hL")
+ , ("mHz", str "mHz")
+ , ("Hz", str "Hz")
+ , ("kHz", str "kHz")
+ , ("MHz", str "MHz")
+ , ("GHz", str "GHz")
+ , ("THz", str "THz")
+ , ("mN", str "mN")
+ , ("N", str "N")
+ , ("kN", str "kN")
+ , ("MN", str "MN")
+ , ("Pa", str "Pa")
+ , ("kPa", str "kPa")
+ , ("MPa", str "MPa")
+ , ("GPa", str "GPa")
+ , ("mohm", str "mΩ")
+ , ("kohm", str "kΩ")
+ , ("Mohm", str "MΩ")
+ , ("pV", str "pV")
+ , ("nV", str "nV")
+ , ("uV", str "μV")
+ , ("mV", str "mV")
+ , ("V", str "V")
+ , ("kV", str "kV")
+ , ("W", str "W")
+ , ("uW", str "μW")
+ , ("mW", str "mW")
+ , ("kW", str "kW")
+ , ("MW", str "MW")
+ , ("GW", str "GW")
+ , ("J", str "J")
+ , ("uJ", str "μJ")
+ , ("mJ", str "mJ")
+ , ("kJ", str "kJ")
+ , ("eV", str "eV")
+ , ("meV", str "meV")
+ , ("keV", str "keV")
+ , ("MeV", str "MeV")
+ , ("GeV", str "GeV")
+ , ("TeV", str "TeV")
+ , ("kWh", str "kWh")
+ , ("F", str "F")
+ , ("fF", str "fF")
+ , ("pF", str "pF")
+ , ("K", str "K")
+ , ("dB", str "dB")
+ , ("angstrom", str "Å")
+ , ("arcmin", str "′")
+ , ("arcminute", str "′")
+ , ("arcsecond", str "″")
+ , ("astronomicalunit", str "ua")
+ , ("atomicmassunit", str "u")
+ , ("atto", str "a")
+ , ("bar", str "bar")
+ , ("barn", str "b")
+ , ("becquerel", str "Bq")
+ , ("bel", str "B")
+ , ("candela", str "cd")
+ , ("celsius", str "°C")
+ , ("centi", str "c")
+ , ("coulomb", str "C")
+ , ("dalton", str "Da")
+ , ("day", str "d")
+ , ("deca", str "d")
+ , ("deci", str "d")
+ , ("decibel", str "db")
+ , ("degreeCelsius",str "°C")
+ , ("degree", str "°")
+ , ("deka", str "d")
+ , ("electronvolt", str "eV")
+ , ("exa", str "E")
+ , ("farad", str "F")
+ , ("femto", str "f")
+ , ("giga", str "G")
+ , ("gram", str "g")
+ , ("hectare", str "ha")
+ , ("hecto", str "h")
+ , ("henry", str "H")
+ , ("hertz", str "Hz")
+ , ("hour", str "h")
+ , ("joule", str "J")
+ , ("katal", str "kat")
+ , ("kelvin", str "K")
+ , ("kilo", str "k")
+ , ("kilogram", str "kg")
+ , ("knot", str "kn")
+ , ("liter", str "L")
+ , ("litre", str "l")
+ , ("lumen", str "lm")
+ , ("lux", str "lx")
+ , ("mega", str "M")
+ , ("meter", str "m")
+ , ("metre", str "m")
+ , ("milli", str "m")
+ , ("minute", str "min")
+ , ("mmHg", str "mmHg")
+ , ("mole", str "mol")
+ , ("nano", str "n")
+ , ("nauticalmile", str "M")
+ , ("neper", str "Np")
+ , ("newton", str "N")
+ , ("ohm", str "Ω")
+ , ("Pa", str "Pa")
+ , ("pascal", str "Pa")
+ , ("percent", str "%")
+ , ("per", str "/")
+ , ("peta", str "P")
+ , ("pico", str "p")
+ , ("radian", str "rad")
+ , ("second", str "s")
+ , ("siemens", str "S")
+ , ("sievert", str "Sv")
+ , ("steradian", str "sr")
+ , ("tera", str "T")
+ , ("tesla", str "T")
+ , ("tonne", str "t")
+ , ("volt", str "V")
+ , ("watt", str "W")
+ , ("weber", str "Wb")
+ , ("yocto", str "y")
+ , ("yotta", str "Y")
+ , ("zepto", str "z")
+ , ("zetta", str "Z")
+ ]
lit :: String -> LP m Inlines
lit = pure . str
@@ -742,13 +480,31 @@ quoted' f starter ender = do
cs -> cs)
else lit startchs
-enquote :: PandocMonad m => LP m Inlines
-enquote = do
+enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
+enquote starred mblang = do
skipopts
+ let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let langspan = case lang of
+ Nothing -> id
+ Just l -> spanWith ("",[],[("lang", renderLang l)])
quoteContext <- sQuoteContext <$> getState
- if quoteContext == InDoubleQuote
- then singleQuoted <$> withQuoteContext InSingleQuote tok
- else doubleQuoted <$> withQuoteContext InDoubleQuote tok
+ if starred || quoteContext == InDoubleQuote
+ then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
+blockquote citations mblang = do
+ citePar <- if citations
+ then do
+ cs <- cites NormalCitation False
+ return $ para (cite cs mempty)
+ else return mempty
+ let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let langdiv = case lang of
+ Nothing -> id
+ Just l -> divWith ("",[],[("lang", renderLang l)])
+ bs <- grouped block
+ return $ blockQuote . langdiv $ (bs <> citePar)
doAcronym :: PandocMonad m => String -> LP m Inlines
doAcronym form = do
@@ -791,6 +547,16 @@ dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
options <- option [] keyvals
let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
+ doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+ skipopts
+ cls <- toksToString <$> braced
+ doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [String] -> LP m Inlines
+doinlinecode classes = do
Tok _ Symbol t <- anySymbol
marker <- case T.uncons t of
Just (c, ts) | T.null ts -> return c
@@ -803,246 +569,41 @@ dolstinline = do
keyval :: PandocMonad m => LP m (String, String)
keyval = try $ do
Tok _ Word key <- satisfyTok isWordTok
- let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= ","
- isSpecSym _ = False
optional sp
- val <- option [] $ do
+ val <- option mempty $ do
symbol '='
optional sp
- braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
- <|> anyControlSeq)
- optional sp
+ (untokenize <$> braced) <|>
+ (mconcat <$> many1 (
+ (untokenize . snd <$> withRaw braced)
+ <|>
+ (untokenize <$> (many1
+ (satisfyTok
+ (\t -> case t of
+ Tok _ Symbol "]" -> False
+ Tok _ Symbol "," -> False
+ Tok _ Symbol "{" -> False
+ Tok _ Symbol "}" -> False
+ _ -> True))))))
optional (symbol ',')
optional sp
- return (T.unpack key, T.unpack . untokenize $ val)
+ return (T.unpack key, T.unpack $ T.strip val)
keyvals :: PandocMonad m => LP m [(String, String)]
keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
-accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines
-accent c f = try $ do
+accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
+accent combiningAccent fallBack = try $ do
ils <- tok
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
- [Space] -> return $ str [c]
- [] -> return $ str [c]
+ (Str (x:xs) : ys) -> return $ fromList $
+ -- try to normalize to the combined character:
+ Str (T.unpack
+ (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent])) ++ xs) : ys
+ [Space] -> return $ str [fromMaybe combiningAccent fallBack]
+ [] -> return $ str [fromMaybe combiningAccent fallBack]
_ -> return ils
-
-grave :: Char -> String
-grave 'A' = "À"
-grave 'E' = "È"
-grave 'I' = "Ì"
-grave 'O' = "Ò"
-grave 'U' = "Ù"
-grave 'a' = "à"
-grave 'e' = "è"
-grave 'i' = "ì"
-grave 'o' = "ò"
-grave 'u' = "ù"
-grave c = [c]
-
-acute :: Char -> String
-acute 'A' = "Á"
-acute 'E' = "É"
-acute 'I' = "Í"
-acute 'O' = "Ó"
-acute 'U' = "Ú"
-acute 'Y' = "Ý"
-acute 'a' = "á"
-acute 'e' = "é"
-acute 'i' = "í"
-acute 'o' = "ó"
-acute 'u' = "ú"
-acute 'y' = "ý"
-acute 'C' = "Ć"
-acute 'c' = "ć"
-acute 'L' = "Ĺ"
-acute 'l' = "ĺ"
-acute 'N' = "Ń"
-acute 'n' = "ń"
-acute 'R' = "Ŕ"
-acute 'r' = "ŕ"
-acute 'S' = "Ś"
-acute 's' = "ś"
-acute 'Z' = "Ź"
-acute 'z' = "ź"
-acute c = [c]
-
-circ :: Char -> String
-circ 'A' = "Â"
-circ 'E' = "Ê"
-circ 'I' = "Î"
-circ 'O' = "Ô"
-circ 'U' = "Û"
-circ 'a' = "â"
-circ 'e' = "ê"
-circ 'i' = "î"
-circ 'o' = "ô"
-circ 'u' = "û"
-circ 'C' = "Ĉ"
-circ 'c' = "ĉ"
-circ 'G' = "Ĝ"
-circ 'g' = "ĝ"
-circ 'H' = "Ĥ"
-circ 'h' = "ĥ"
-circ 'J' = "Ĵ"
-circ 'j' = "ĵ"
-circ 'S' = "Ŝ"
-circ 's' = "ŝ"
-circ 'W' = "Ŵ"
-circ 'w' = "ŵ"
-circ 'Y' = "Ŷ"
-circ 'y' = "ŷ"
-circ c = [c]
-
-tilde :: Char -> String
-tilde 'A' = "Ã"
-tilde 'a' = "ã"
-tilde 'O' = "Õ"
-tilde 'o' = "õ"
-tilde 'I' = "Ĩ"
-tilde 'i' = "ĩ"
-tilde 'U' = "Ũ"
-tilde 'u' = "ũ"
-tilde 'N' = "Ñ"
-tilde 'n' = "ñ"
-tilde c = [c]
-
-umlaut :: Char -> String
-umlaut 'A' = "Ä"
-umlaut 'E' = "Ë"
-umlaut 'I' = "Ï"
-umlaut 'O' = "Ö"
-umlaut 'U' = "Ü"
-umlaut 'a' = "ä"
-umlaut 'e' = "ë"
-umlaut 'i' = "ï"
-umlaut 'o' = "ö"
-umlaut 'u' = "ü"
-umlaut c = [c]
-
-hungarumlaut :: Char -> String
-hungarumlaut 'A' = "A̋"
-hungarumlaut 'E' = "E̋"
-hungarumlaut 'I' = "I̋"
-hungarumlaut 'O' = "Ő"
-hungarumlaut 'U' = "Ű"
-hungarumlaut 'Y' = "ӳ"
-hungarumlaut 'a' = "a̋"
-hungarumlaut 'e' = "e̋"
-hungarumlaut 'i' = "i̋"
-hungarumlaut 'o' = "ő"
-hungarumlaut 'u' = "ű"
-hungarumlaut 'y' = "ӳ"
-hungarumlaut c = [c]
-
-dot :: Char -> String
-dot 'C' = "Ċ"
-dot 'c' = "ċ"
-dot 'E' = "Ė"
-dot 'e' = "ė"
-dot 'G' = "Ġ"
-dot 'g' = "ġ"
-dot 'I' = "İ"
-dot 'Z' = "Ż"
-dot 'z' = "ż"
-dot c = [c]
-
-macron :: Char -> String
-macron 'A' = "Ā"
-macron 'E' = "Ē"
-macron 'I' = "Ī"
-macron 'O' = "Ō"
-macron 'U' = "Ū"
-macron 'a' = "ā"
-macron 'e' = "ē"
-macron 'i' = "ī"
-macron 'o' = "ō"
-macron 'u' = "ū"
-macron c = [c]
-
-cedilla :: Char -> String
-cedilla 'c' = "ç"
-cedilla 'C' = "Ç"
-cedilla 's' = "ş"
-cedilla 'S' = "Ş"
-cedilla 't' = "ţ"
-cedilla 'T' = "Ţ"
-cedilla 'e' = "ȩ"
-cedilla 'E' = "Ȩ"
-cedilla 'h' = "ḩ"
-cedilla 'H' = "Ḩ"
-cedilla 'o' = "o̧"
-cedilla 'O' = "O̧"
-cedilla c = [c]
-
-hacek :: Char -> String
-hacek 'A' = "Ǎ"
-hacek 'a' = "ǎ"
-hacek 'C' = "Č"
-hacek 'c' = "č"
-hacek 'D' = "Ď"
-hacek 'd' = "ď"
-hacek 'E' = "Ě"
-hacek 'e' = "ě"
-hacek 'G' = "Ǧ"
-hacek 'g' = "ǧ"
-hacek 'H' = "Ȟ"
-hacek 'h' = "ȟ"
-hacek 'I' = "Ǐ"
-hacek 'i' = "ǐ"
-hacek 'j' = "ǰ"
-hacek 'K' = "Ǩ"
-hacek 'k' = "ǩ"
-hacek 'L' = "Ľ"
-hacek 'l' = "ľ"
-hacek 'N' = "Ň"
-hacek 'n' = "ň"
-hacek 'O' = "Ǒ"
-hacek 'o' = "ǒ"
-hacek 'R' = "Ř"
-hacek 'r' = "ř"
-hacek 'S' = "Š"
-hacek 's' = "š"
-hacek 'T' = "Ť"
-hacek 't' = "ť"
-hacek 'U' = "Ǔ"
-hacek 'u' = "ǔ"
-hacek 'Z' = "Ž"
-hacek 'z' = "ž"
-hacek c = [c]
-
-ogonek :: Char -> String
-ogonek 'a' = "ą"
-ogonek 'e' = "ę"
-ogonek 'o' = "ǫ"
-ogonek 'i' = "į"
-ogonek 'u' = "ų"
-ogonek 'A' = "Ą"
-ogonek 'E' = "Ę"
-ogonek 'I' = "Į"
-ogonek 'O' = "Ǫ"
-ogonek 'U' = "Ų"
-ogonek c = [c]
-
-breve :: Char -> String
-breve 'A' = "Ă"
-breve 'a' = "ă"
-breve 'E' = "Ĕ"
-breve 'e' = "ĕ"
-breve 'G' = "Ğ"
-breve 'g' = "ğ"
-breve 'I' = "Ĭ"
-breve 'i' = "ĭ"
-breve 'O' = "Ŏ"
-breve 'o' = "ŏ"
-breve 'U' = "Ŭ"
-breve 'u' = "ŭ"
-breve c = [c]
-
-toksToString :: [Tok] -> String
-toksToString = T.unpack . untokenize
-
mathDisplay :: String -> Inlines
mathDisplay = displayMath . trim
@@ -1119,7 +680,21 @@ citationLabel = do
cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
cites mode multi = try $ do
cits <- if multi
- then many1 simpleCiteArgs
+ then do
+ multiprenote <- optionMaybe $ toList <$> paropt
+ multipostnote <- optionMaybe $ toList <$> paropt
+ let (pre, suf) = case (multiprenote, multipostnote) of
+ (Just s , Nothing) -> (mempty, s)
+ (Nothing , Just t) -> (mempty, t)
+ (Just s , Just t ) -> (s, t)
+ _ -> (mempty, mempty)
+ tempCits <- many1 simpleCiteArgs
+ case tempCits of
+ (k:ks) -> case ks of
+ (_:_) -> return $ ((addMprenote pre k):init ks) ++
+ [addMpostnote suf (last ks)]
+ _ -> return [addMprenote pre (addMpostnote suf k)]
+ _ -> return [[]]
else count 1 simpleCiteArgs
let cs = concat cits
return $ case mode of
@@ -1127,6 +702,17 @@ cites mode multi = try $ do
(c:rest) -> c {citationMode = mode} : rest
[] -> []
_ -> map (\a -> a {citationMode = mode}) cs
+ where mprenote (k:ks) = (k:ks) ++ [Space]
+ mprenote _ = mempty
+ mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+ mpostnote _ = mempty
+ addMprenote mpn (k:ks) =
+ let mpnfinal = case citationPrefix k of
+ (_:_) -> mprenote mpn
+ _ -> mpn
+ in addPrefix mpnfinal (k:ks)
+ addMprenote _ _ = []
+ addMpostnote = addSuffix . mpostnote
citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
@@ -1181,22 +767,12 @@ tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
Tok _ _ t <- singleChar
return (str (T.unpack t))
-singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
- Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
- guard $ not $ toktype == Symbol &&
- T.any (`Set.member` specialChars) t
- if T.length t > 1
- then do
- let (t1, t2) = (T.take 1 t, T.drop 1 t)
- inp <- getInput
- setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
- return $ Tok pos toktype t1
- else return $ Tok pos toktype t
-
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline <|> (str . T.unpack <$> rawopt)
+paropt :: PandocMonad m => LP m Inlines
+paropt = parenWrapped inline
+
rawopt :: PandocMonad m => LP m Text
rawopt = do
inner <- untokenize <$> bracketedToks
@@ -1204,30 +780,28 @@ rawopt = do
return $ "[" <> inner <> "]"
skipopts :: PandocMonad m => LP m ()
-skipopts = skipMany rawopt
+skipopts = skipMany (overlaySpecification <|> void rawopt)
-- opts in angle brackets are used in beamer
-rawangle :: PandocMonad m => LP m ()
-rawangle = try $ do
+overlaySpecification :: PandocMonad m => LP m ()
+overlaySpecification = try $ do
symbol '<'
- () <$ manyTill anyTok (symbol '>')
-
-skipangles :: PandocMonad m => LP m ()
-skipangles = skipMany rawangle
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
-withRaw parser = do
- inp <- getInput
- result <- parser
- nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
- let raw = takeWhile (/= nxt) inp
- return (result, raw)
+ ts <- manyTill overlayTok (symbol '>')
+ guard $ case ts of
+ -- see issue #3368
+ [Tok _ Word s] | T.all isLetter s -> s `elem`
+ ["beamer","presentation", "trans",
+ "handout","article", "second"]
+ _ -> True
+
+overlayTok :: PandocMonad m => LP m Tok
+overlayTok =
+ satisfyTok (\t ->
+ case t of
+ Tok _ Word _ -> True
+ Tok _ Spaces _ -> True
+ Tok _ Symbol c -> c `elem` ["-","+","@","|",":",","]
+ _ -> False)
inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"
@@ -1275,6 +849,12 @@ inlineEnvironments = M.fromList [
, ("align*", mathEnvWith id (Just "aligned") "align*")
, ("alignat", mathEnvWith id (Just "aligned") "alignat")
, ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ , ("dmath", mathEnvWith id Nothing "dmath")
+ , ("dmath*", mathEnvWith id Nothing "dmath*")
+ , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+ , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+ , ("darray", mathEnvWith id (Just "aligned") "darray")
+ , ("darray*", mathEnvWith id (Just "aligned") "darray*")
]
inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -1289,7 +869,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
, ("texttt", ttfamily)
, ("sout", extractSpaces strikeout <$> tok)
- , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+ , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
, ("lq", return (str "‘"))
, ("rq", return (str "’"))
, ("textquoteleft", return (str "‘"))
@@ -1318,7 +898,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
, ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
, ("ensuremath", mathInline . toksToString <$> braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("texorpdfstring", (\x _ -> x) <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
, ("$", lit "$")
@@ -1361,20 +941,32 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("copyright", lit "©")
, ("textasciicircum", lit "^")
, ("textasciitilde", lit "~")
- , ("H", accent '\779' hungarumlaut)
- , ("`", accent '`' grave)
- , ("'", accent '\'' acute)
- , ("^", accent '^' circ)
- , ("~", accent '~' tilde)
- , ("\"", accent '\776' umlaut)
- , (".", accent '\775' dot)
- , ("=", accent '\772' macron)
- , ("c", accent '\807' cedilla)
- , ("v", accent 'ˇ' hacek)
- , ("u", accent '\774' breve)
- , ("k", accent '\808' ogonek)
- , ("textogonekcentered", accent '\808' ogonek)
- , ("i", lit "i")
+ , ("H", accent '\779' Nothing) -- hungarumlaut
+ , ("`", accent '\768' (Just '`')) -- grave
+ , ("'", accent '\769' (Just '\'')) -- acute
+ , ("^", accent '\770' (Just '^')) -- circ
+ , ("~", accent '\771' (Just '~')) -- tilde
+ , ("\"", accent '\776' Nothing) -- umlaut
+ , (".", accent '\775' Nothing) -- dot
+ , ("=", accent '\772' Nothing) -- macron
+ , ("|", accent '\781' Nothing) -- vertical line above
+ , ("b", accent '\817' Nothing) -- macron below
+ , ("c", accent '\807' Nothing) -- cedilla
+ , ("G", accent '\783' Nothing) -- doublegrave
+ , ("h", accent '\777' Nothing) -- hookabove
+ , ("d", accent '\803' Nothing) -- dotbelow
+ , ("f", accent '\785' Nothing) -- inverted breve
+ , ("r", accent '\778' Nothing) -- ringabove
+ , ("t", accent '\865' Nothing) -- double inverted breve
+ , ("U", accent '\782' Nothing) -- double vertical line above
+ , ("v", accent '\780' Nothing) -- hacek
+ , ("u", accent '\774' Nothing) -- breve
+ , ("k", accent '\808' Nothing) -- ogonek
+ , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+ , ("i", lit "ı") -- dotless i
+ , ("j", lit "ȷ") -- dotless j
+ , ("newtie", accent '\785' Nothing) -- inverted breve
+ , ("textcircled", accent '\8413' Nothing) -- combining circle
, ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
guard $ not inTableCell
optional opt
@@ -1392,17 +984,25 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("footnote", skipopts >> note <$> grouped block)
, ("verb", doverb)
, ("lstinline", dolstinline)
+ , ("mintinline", domintinline)
, ("Verb", doverb)
- , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
pure (link url "" (str url)))
, ("href", (unescapeURL . toksToString <$>
- braced <* optional sp) >>= \url ->
+ bracedUrl <* optional sp) >>= \url ->
tok >>= \lab -> pure (link url "" lab))
, ("includegraphics", do options <- option [] keyvals
src <- unescapeURL . T.unpack .
removeDoubleQuotes . untokenize <$> braced
mkImage options src)
- , ("enquote", enquote)
+ , ("enquote*", enquote True Nothing)
+ , ("enquote", enquote False Nothing)
+ -- foreignquote is supposed to use native quote marks
+ , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+ , ("foreignquote", braced >>= enquote False . Just . untokenize)
+ -- hypehnquote uses regular quotes
+ , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+ , ("hyphenquote", braced >>= enquote False . Just . untokenize)
, ("figurename", doTerm Translations.Figure)
, ("prefacename", doTerm Translations.Preface)
, ("refname", doTerm Translations.References)
@@ -1507,13 +1107,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("acsp", doAcronymPlural "abbrv")
-- siuntix
, ("SI", dosiunitx)
- -- units of siuntix
- , ("celsius", lit "°C")
- , ("degreeCelsius", lit "°C")
- , ("gram", lit "g")
- , ("meter", lit "m")
- , ("milli", lit "m")
- , ("square", dosquare)
-- hyphenat
, ("bshyp", lit "\\\173")
, ("fshyp", lit "/\173")
@@ -1542,8 +1135,18 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("Rn", romanNumeralLower)
-- babel
, ("foreignlanguage", foreignlanguage)
+ -- include
+ , ("input", include "input")
+ -- plain tex stuff that should just be passed through as raw tex
+ , ("ifdim", ifdim)
]
+ifdim :: PandocMonad m => LP m Inlines
+ifdim = do
+ contents <- manyTill anyTok (controlSeq "fi")
+ return $ rawInline "latex" $ T.unpack $
+ "\\ifdim" <> untokenize contents <> "\\fi"
+
makeUppercase :: Inlines -> Inlines
makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
@@ -1693,7 +1296,6 @@ getRawCommand name txt = do
"def" ->
void $ manyTill anyTok braced
_ -> do
- skipangles
skipopts
option "" (try (optional sp *> dimenarg))
void $ many braced
@@ -1818,7 +1420,6 @@ end_ t = try (do
preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$ many preambleBlock
where preambleBlock = spaces1
- <|> void include
<|> void macroDef
<|> void blockCommand
<|> void braced
@@ -1831,11 +1432,8 @@ paragraph = do
then return mempty
else return $ para x
-include :: PandocMonad m => LP m Blocks
-include = do
- (Tok _ (CtrlSeq name) _) <-
- controlSeq "include" <|> controlSeq "input" <|>
- controlSeq "subfile" <|> controlSeq "usepackage"
+include :: (PandocMonad m, Monoid a) => Text -> LP m a
+include name = do
skipMany opt
fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," .
untokenize) <$> braced
@@ -1912,31 +1510,28 @@ letmacro = do
optional $ symbol '='
spaces
contents <- bracedOrToken
- return (name, Macro ExpandWhenDefined 0 Nothing contents)
+ return (name, Macro ExpandWhenDefined [] Nothing contents)
defmacro :: PandocMonad m => LP m (Text, Macro)
defmacro = try $ do
controlSeq "def"
Tok _ (CtrlSeq name) _ <- anyControlSeq
- numargs <- option 0 $ argSeq 1
+ argspecs <- many (argspecArg <|> argspecPattern)
-- we use withVerbatimMode, because macros are to be expanded
-- at point of use, not point of definition
contents <- withVerbatimMode bracedOrToken
- return (name, Macro ExpandWhenUsed numargs Nothing contents)
+ return (name, Macro ExpandWhenUsed argspecs Nothing contents)
--- Note: we don't yet support fancy things like #1.#2
-argSeq :: PandocMonad m => Int -> LP m Int
-argSeq n = do
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
Tok _ (Arg i) _ <- satisfyTok isArgTok
- guard $ i == n
- argSeq (n+1) <|> return n
+ return $ ArgNum i
-isArgTok :: Tok -> Bool
-isArgTok (Tok _ (Arg _) _) = True
-isArgTok _ = False
-
-bracedOrToken :: PandocMonad m => LP m [Tok]
-bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+ Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+ (toktype' == Symbol || toktype' == Word) &&
+ (txt /= "{" && txt /= "\\" && txt /= "}")))
newcommand :: PandocMonad m => LP m (Text, Macro)
newcommand = do
@@ -1950,6 +1545,7 @@ newcommand = do
(symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
spaces
numargs <- option 0 $ try bracketedNum
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
spaces
optarg <- option Nothing $ Just <$> try bracketedToks
spaces
@@ -1959,7 +1555,7 @@ newcommand = do
case M.lookup name macros of
Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
Nothing -> return ()
- return (name, Macro ExpandWhenUsed numargs optarg contents)
+ return (name, Macro ExpandWhenUsed argspecs optarg contents)
newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
newenvironment = do
@@ -1972,6 +1568,7 @@ newenvironment = do
name <- untokenize <$> braced
spaces
numargs <- option 0 $ try bracketedNum
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
spaces
optarg <- option Nothing $ Just <$> try bracketedToks
spaces
@@ -1983,13 +1580,8 @@ newenvironment = do
case M.lookup name macros of
Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
Nothing -> return ()
- return (name, Macro ExpandWhenUsed numargs optarg startcontents,
- Macro ExpandWhenUsed 0 Nothing endcontents)
-
-bracketedToks :: PandocMonad m => LP m [Tok]
-bracketedToks = do
- symbol '['
- mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+ return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
+ Macro ExpandWhenUsed [] Nothing endcontents)
bracketedNum :: PandocMonad m => LP m Int
bracketedNum = do
@@ -2003,11 +1595,13 @@ setCaption = do
ils <- tok
mblabel <- option Nothing $
try $ spaces >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ sCaption = Just ils' }
+ let capt = case mblabel of
+ Just lab -> let slab = stringify lab
+ ils' = ils <> spanWith
+ ("",[],[("label", slab)]) mempty
+ in (Just ils', Just slab)
+ Nothing -> (Just ils, Nothing)
+ updateState $ \st -> st{ sCaption = capt }
return mempty
looseItem :: PandocMonad m => LP m Blocks
@@ -2018,28 +1612,27 @@ looseItem = do
return mempty
resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+resetCaption = updateState $ \st -> st{ sCaption = (Nothing, Nothing) }
-section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks
-section starred (ident, classes, kvs) lvl = do
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
skipopts
contents <- grouped inline
lab <- option ident $
try (spaces >> controlSeq "label"
>> spaces >> toksToString <$> braced)
- let classes' = if starred then "unnumbered" : classes else classes
when (lvl == 0) $
updateState $ \st -> st{ sHasChapters = True }
- unless starred $ do
+ unless ("unnumbered" `elem` classes) $ do
hn <- sLastHeaderNum <$> getState
hasChapters <- sHasChapters <$> getState
let lvl' = lvl + if hasChapters then 1 else 0
- let num = incrementHeaderNum lvl' hn
- updateState $ \st -> st{ sLastHeaderNum = num }
- updateState $ \st -> st{ sLabels = M.insert lab
- [Str (renderHeaderNum num)]
- (sLabels st) }
- attr' <- registerHeader (lab, classes', kvs) contents
+ let num = incrementDottedNum lvl' hn
+ updateState $ \st -> st{ sLastHeaderNum = num
+ , sLabels = M.insert lab
+ [Str (renderDottedNum num)]
+ (sLabels st) }
+ attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl contents
blockCommand :: PandocMonad m => LP m Blocks
@@ -2100,23 +1693,23 @@ blockCommands = M.fromList
-- Koma-script metadata commands
, ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
-- sectioning
- , ("part", section False nullAttr (-1))
- , ("part*", section True nullAttr (-1))
- , ("chapter", section False nullAttr 0)
- , ("chapter*", section True ("",["unnumbered"],[]) 0)
- , ("section", section False nullAttr 1)
- , ("section*", section True ("",["unnumbered"],[]) 1)
- , ("subsection", section False nullAttr 2)
- , ("subsection*", section True ("",["unnumbered"],[]) 2)
- , ("subsubsection", section False nullAttr 3)
- , ("subsubsection*", section True ("",["unnumbered"],[]) 3)
- , ("paragraph", section False nullAttr 4)
- , ("paragraph*", section True ("",["unnumbered"],[]) 4)
- , ("subparagraph", section False nullAttr 5)
- , ("subparagraph*", section True ("",["unnumbered"],[]) 5)
+ , ("part", section nullAttr (-1))
+ , ("part*", section nullAttr (-1))
+ , ("chapter", section nullAttr 0)
+ , ("chapter*", section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
-- beamer slides
- , ("frametitle", section False nullAttr 3)
- , ("framesubtitle", section False nullAttr 4)
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
-- letters
, ("opening", (para . trimInlines) <$> (skipopts *> tok))
, ("closing", skipopts *> closing)
@@ -2152,6 +1745,18 @@ blockCommands = M.fromList
-- LaTeX colors
, ("textcolor", coloredBlock "color")
, ("colorbox", coloredBlock "background-color")
+ -- csquotes
+ , ("blockquote", blockquote False Nothing)
+ , ("blockcquote", blockquote True Nothing)
+ , ("foreignblockquote", braced >>= blockquote False . Just . untokenize)
+ , ("foreignblockcquote", braced >>= blockquote True . Just . untokenize)
+ , ("hyphenblockquote", braced >>= blockquote False . Just . untokenize)
+ , ("hyphenblockcquote", braced >>= blockquote True . Just . untokenize)
+ -- include
+ , ("include", include "include")
+ , ("input", include "input")
+ , ("subfile", include "subfile")
+ , ("usepackage", include "usepackage")
]
@@ -2192,6 +1797,7 @@ environments = M.fromList
, ("minted", minted)
, ("obeylines", obeylines)
, ("tikzpicture", rawVerbEnv "tikzpicture")
+ , ("lilypond", rawVerbEnv "lilypond")
-- etoolbox
, ("ifstrequal", ifstrequal)
, ("newtoggle", braced >>= newToggle)
@@ -2234,7 +1840,7 @@ rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{tikzpicture}" ++ toksToString raw
+ let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -2248,7 +1854,20 @@ verbEnv name = withVerbatimMode $ do
skipopts
optional blankline
res <- manyTill anyTok (end_ name)
- return $ stripTrailingNewlines $ toksToString res
+ return $ T.unpack
+ $ stripTrailingNewline
+ $ untokenize
+ $ res
+
+-- Strip single final newline and any spaces following it.
+-- Input is unchanged if it doesn't end with newline +
+-- optional spaces.
+stripTrailingNewline :: Text -> Text
+stripTrailingNewline t =
+ let (b, e) = T.breakOnEnd "\n" t
+ in if T.all (== ' ') e
+ then T.dropEnd 1 b
+ else t
fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
fancyverbEnv name = do
@@ -2303,12 +1922,43 @@ figure = try $ do
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
- where go (Image attr alt (src,tit))
+ where go (Image attr@(_, cls, kvs) alt (src,tit))
| not ("fig:" `isPrefixOf` tit) = do
- mbcapt <- sCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
- Nothing -> Image attr alt (src,tit)
+ (mbcapt, mblab) <- sCaption <$> getState
+ let (alt', tit') = case mbcapt of
+ Just ils -> (toList ils, "fig:" ++ tit)
+ Nothing -> (alt, tit)
+ attr' = case mblab of
+ Just lab -> (lab, cls, kvs)
+ Nothing -> attr
+ case attr' of
+ ("", _, _) -> return ()
+ (ident, _, _) -> do
+ st <- getState
+ let chapnum =
+ case (sHasChapters st, sLastHeaderNum st) of
+ (True, DottedNum (n:_)) -> Just n
+ _ -> Nothing
+ let num = case sLastFigureNum st of
+ DottedNum [m,n] ->
+ case chapnum of
+ Just m' | m' == m -> DottedNum [m, n+1]
+ | otherwise -> DottedNum [m', 1]
+ Nothing -> DottedNum [1]
+ -- shouldn't happen
+ DottedNum [n] ->
+ case chapnum of
+ Just m -> DottedNum [m, 1]
+ Nothing -> DottedNum [n + 1]
+ _ ->
+ case chapnum of
+ Just n -> DottedNum [n, 1]
+ Nothing -> DottedNum [1]
+ setState $
+ st{ sLastFigureNum = num
+ , sLabels = M.insert ident
+ [Str (renderDottedNum num)] (sLabels st) }
+ return $ Image attr' alt' (src, tit')
go x = return x
coloredBlock :: PandocMonad m => String -> LP m Blocks
@@ -2321,7 +1971,8 @@ coloredBlock stylename = try $ do
graphicsPath :: PandocMonad m => LP m Blocks
graphicsPath = do
- ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ ps <- map toksToString <$>
+ (bgroup *> spaces *> manyTill (braced <* spaces) egroup)
getResourcePath >>= setResourcePath . (++ ps)
return mempty
@@ -2579,7 +2230,7 @@ simpTable envname hasWidthParameter = try $ do
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
- mbcapt <- sCaption <$> getState
+ (mbcapt, _) <- sCaption <$> getState
return $ case mbcapt of
Just ils -> Table (toList ils) als ws hs rs
Nothing -> Table c als ws hs rs
@@ -2590,7 +2241,6 @@ block :: PandocMonad m => LP m Blocks
block = do
res <- (mempty <$ spaces1)
<|> environment
- <|> include
<|> macroDef
<|> blockCommand
<|> paragraph
@@ -2613,137 +2263,3 @@ setDefaultLanguage = do
setTranslations l
updateState $ setMeta "lang" $ str (renderLang l)
return mempty
-
-polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
-polyglossiaLangToBCP47 = M.fromList
- [ ("arabic", \o -> case filter (/=' ') o of
- "locale=algeria" -> Lang "ar" "" "DZ" []
- "locale=mashriq" -> Lang "ar" "" "SY" []
- "locale=libya" -> Lang "ar" "" "LY" []
- "locale=morocco" -> Lang "ar" "" "MA" []
- "locale=mauritania" -> Lang "ar" "" "MR" []
- "locale=tunisia" -> Lang "ar" "" "TN" []
- _ -> Lang "ar" "" "" [])
- , ("german", \o -> case filter (/=' ') o of
- "spelling=old" -> Lang "de" "" "DE" ["1901"]
- "variant=austrian,spelling=old"
- -> Lang "de" "" "AT" ["1901"]
- "variant=austrian" -> Lang "de" "" "AT" []
- "variant=swiss,spelling=old"
- -> Lang "de" "" "CH" ["1901"]
- "variant=swiss" -> Lang "de" "" "CH" []
- _ -> Lang "de" "" "" [])
- , ("lsorbian", \_ -> Lang "dsb" "" "" [])
- , ("greek", \o -> case filter (/=' ') o of
- "variant=poly" -> Lang "el" "" "polyton" []
- "variant=ancient" -> Lang "grc" "" "" []
- _ -> Lang "el" "" "" [])
- , ("english", \o -> case filter (/=' ') o of
- "variant=australian" -> Lang "en" "" "AU" []
- "variant=canadian" -> Lang "en" "" "CA" []
- "variant=british" -> Lang "en" "" "GB" []
- "variant=newzealand" -> Lang "en" "" "NZ" []
- "variant=american" -> Lang "en" "" "US" []
- _ -> Lang "en" "" "" [])
- , ("usorbian", \_ -> Lang "hsb" "" "" [])
- , ("latin", \o -> case filter (/=' ') o of
- "variant=classic" -> Lang "la" "" "" ["x-classic"]
- _ -> Lang "la" "" "" [])
- , ("slovenian", \_ -> Lang "sl" "" "" [])
- , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
- , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
- , ("afrikaans", \_ -> Lang "af" "" "" [])
- , ("amharic", \_ -> Lang "am" "" "" [])
- , ("assamese", \_ -> Lang "as" "" "" [])
- , ("asturian", \_ -> Lang "ast" "" "" [])
- , ("bulgarian", \_ -> Lang "bg" "" "" [])
- , ("bengali", \_ -> Lang "bn" "" "" [])
- , ("tibetan", \_ -> Lang "bo" "" "" [])
- , ("breton", \_ -> Lang "br" "" "" [])
- , ("catalan", \_ -> Lang "ca" "" "" [])
- , ("welsh", \_ -> Lang "cy" "" "" [])
- , ("czech", \_ -> Lang "cs" "" "" [])
- , ("coptic", \_ -> Lang "cop" "" "" [])
- , ("danish", \_ -> Lang "da" "" "" [])
- , ("divehi", \_ -> Lang "dv" "" "" [])
- , ("esperanto", \_ -> Lang "eo" "" "" [])
- , ("spanish", \_ -> Lang "es" "" "" [])
- , ("estonian", \_ -> Lang "et" "" "" [])
- , ("basque", \_ -> Lang "eu" "" "" [])
- , ("farsi", \_ -> Lang "fa" "" "" [])
- , ("finnish", \_ -> Lang "fi" "" "" [])
- , ("french", \_ -> Lang "fr" "" "" [])
- , ("friulan", \_ -> Lang "fur" "" "" [])
- , ("irish", \_ -> Lang "ga" "" "" [])
- , ("scottish", \_ -> Lang "gd" "" "" [])
- , ("ethiopic", \_ -> Lang "gez" "" "" [])
- , ("galician", \_ -> Lang "gl" "" "" [])
- , ("hebrew", \_ -> Lang "he" "" "" [])
- , ("hindi", \_ -> Lang "hi" "" "" [])
- , ("croatian", \_ -> Lang "hr" "" "" [])
- , ("magyar", \_ -> Lang "hu" "" "" [])
- , ("armenian", \_ -> Lang "hy" "" "" [])
- , ("interlingua", \_ -> Lang "ia" "" "" [])
- , ("indonesian", \_ -> Lang "id" "" "" [])
- , ("icelandic", \_ -> Lang "is" "" "" [])
- , ("italian", \_ -> Lang "it" "" "" [])
- , ("japanese", \_ -> Lang "jp" "" "" [])
- , ("khmer", \_ -> Lang "km" "" "" [])
- , ("kurmanji", \_ -> Lang "kmr" "" "" [])
- , ("kannada", \_ -> Lang "kn" "" "" [])
- , ("korean", \_ -> Lang "ko" "" "" [])
- , ("lao", \_ -> Lang "lo" "" "" [])
- , ("lithuanian", \_ -> Lang "lt" "" "" [])
- , ("latvian", \_ -> Lang "lv" "" "" [])
- , ("malayalam", \_ -> Lang "ml" "" "" [])
- , ("mongolian", \_ -> Lang "mn" "" "" [])
- , ("marathi", \_ -> Lang "mr" "" "" [])
- , ("dutch", \_ -> Lang "nl" "" "" [])
- , ("nynorsk", \_ -> Lang "nn" "" "" [])
- , ("norsk", \_ -> Lang "no" "" "" [])
- , ("nko", \_ -> Lang "nqo" "" "" [])
- , ("occitan", \_ -> Lang "oc" "" "" [])
- , ("panjabi", \_ -> Lang "pa" "" "" [])
- , ("polish", \_ -> Lang "pl" "" "" [])
- , ("piedmontese", \_ -> Lang "pms" "" "" [])
- , ("portuguese", \_ -> Lang "pt" "" "" [])
- , ("romansh", \_ -> Lang "rm" "" "" [])
- , ("romanian", \_ -> Lang "ro" "" "" [])
- , ("russian", \_ -> Lang "ru" "" "" [])
- , ("sanskrit", \_ -> Lang "sa" "" "" [])
- , ("samin", \_ -> Lang "se" "" "" [])
- , ("slovak", \_ -> Lang "sk" "" "" [])
- , ("albanian", \_ -> Lang "sq" "" "" [])
- , ("serbian", \_ -> Lang "sr" "" "" [])
- , ("swedish", \_ -> Lang "sv" "" "" [])
- , ("syriac", \_ -> Lang "syr" "" "" [])
- , ("tamil", \_ -> Lang "ta" "" "" [])
- , ("telugu", \_ -> Lang "te" "" "" [])
- , ("thai", \_ -> Lang "th" "" "" [])
- , ("turkmen", \_ -> Lang "tk" "" "" [])
- , ("turkish", \_ -> Lang "tr" "" "" [])
- , ("ukrainian", \_ -> Lang "uk" "" "" [])
- , ("urdu", \_ -> Lang "ur" "" "" [])
- , ("vietnamese", \_ -> Lang "vi" "" "" [])
- ]
-
-babelLangToBCP47 :: String -> Maybe Lang
-babelLangToBCP47 s =
- case s of
- "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
- "naustrian" -> Just $ Lang "de" "" "AT" []
- "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
- "nswissgerman" -> Just $ Lang "de" "" "CH" []
- "german" -> Just $ Lang "de" "" "DE" ["1901"]
- "ngerman" -> Just $ Lang "de" "" "DE" []
- "lowersorbian" -> Just $ Lang "dsb" "" "" []
- "uppersorbian" -> Just $ Lang "hsb" "" "" []
- "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
- "slovene" -> Just $ Lang "sl" "" "" []
- "australian" -> Just $ Lang "en" "" "AU" []
- "canadian" -> Just $ Lang "en" "" "CA" []
- "british" -> Just $ Lang "en" "" "GB" []
- "newzealand" -> Just $ Lang "en" "" "NZ" []
- "american" -> Just $ Lang "en" "" "US" []
- "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
- _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
new file mode 100644
index 000000000..9b57c98fd
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -0,0 +1,173 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Lang
+ Copyright : Copyright (C) 2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing polyglossia and babel language specifiers to
+BCP47 'Lang'.
+-}
+module Text.Pandoc.Readers.LaTeX.Lang
+ ( polyglossiaLangToBCP47
+ , babelLangToBCP47
+ )
+where
+import Prelude
+import qualified Data.Map as M
+import Text.Pandoc.BCP47 (Lang(..))
+
+polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 = M.fromList
+ [ ("arabic", \o -> case filter (/=' ') o of
+ "locale=algeria" -> Lang "ar" "" "DZ" []
+ "locale=mashriq" -> Lang "ar" "" "SY" []
+ "locale=libya" -> Lang "ar" "" "LY" []
+ "locale=morocco" -> Lang "ar" "" "MA" []
+ "locale=mauritania" -> Lang "ar" "" "MR" []
+ "locale=tunisia" -> Lang "ar" "" "TN" []
+ _ -> Lang "ar" "" "" [])
+ , ("german", \o -> case filter (/=' ') o of
+ "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "variant=austrian,spelling=old"
+ -> Lang "de" "" "AT" ["1901"]
+ "variant=austrian" -> Lang "de" "" "AT" []
+ "variant=swiss,spelling=old"
+ -> Lang "de" "" "CH" ["1901"]
+ "variant=swiss" -> Lang "de" "" "CH" []
+ _ -> Lang "de" "" "" [])
+ , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ , ("greek", \o -> case filter (/=' ') o of
+ "variant=poly" -> Lang "el" "" "polyton" []
+ "variant=ancient" -> Lang "grc" "" "" []
+ _ -> Lang "el" "" "" [])
+ , ("english", \o -> case filter (/=' ') o of
+ "variant=australian" -> Lang "en" "" "AU" []
+ "variant=canadian" -> Lang "en" "" "CA" []
+ "variant=british" -> Lang "en" "" "GB" []
+ "variant=newzealand" -> Lang "en" "" "NZ" []
+ "variant=american" -> Lang "en" "" "US" []
+ _ -> Lang "en" "" "" [])
+ , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ , ("latin", \o -> case filter (/=' ') o of
+ "variant=classic" -> Lang "la" "" "" ["x-classic"]
+ _ -> Lang "la" "" "" [])
+ , ("slovenian", \_ -> Lang "sl" "" "" [])
+ , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
+ , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
+ , ("afrikaans", \_ -> Lang "af" "" "" [])
+ , ("amharic", \_ -> Lang "am" "" "" [])
+ , ("assamese", \_ -> Lang "as" "" "" [])
+ , ("asturian", \_ -> Lang "ast" "" "" [])
+ , ("bulgarian", \_ -> Lang "bg" "" "" [])
+ , ("bengali", \_ -> Lang "bn" "" "" [])
+ , ("tibetan", \_ -> Lang "bo" "" "" [])
+ , ("breton", \_ -> Lang "br" "" "" [])
+ , ("catalan", \_ -> Lang "ca" "" "" [])
+ , ("welsh", \_ -> Lang "cy" "" "" [])
+ , ("czech", \_ -> Lang "cs" "" "" [])
+ , ("coptic", \_ -> Lang "cop" "" "" [])
+ , ("danish", \_ -> Lang "da" "" "" [])
+ , ("divehi", \_ -> Lang "dv" "" "" [])
+ , ("esperanto", \_ -> Lang "eo" "" "" [])
+ , ("spanish", \_ -> Lang "es" "" "" [])
+ , ("estonian", \_ -> Lang "et" "" "" [])
+ , ("basque", \_ -> Lang "eu" "" "" [])
+ , ("farsi", \_ -> Lang "fa" "" "" [])
+ , ("finnish", \_ -> Lang "fi" "" "" [])
+ , ("french", \_ -> Lang "fr" "" "" [])
+ , ("friulan", \_ -> Lang "fur" "" "" [])
+ , ("irish", \_ -> Lang "ga" "" "" [])
+ , ("scottish", \_ -> Lang "gd" "" "" [])
+ , ("ethiopic", \_ -> Lang "gez" "" "" [])
+ , ("galician", \_ -> Lang "gl" "" "" [])
+ , ("hebrew", \_ -> Lang "he" "" "" [])
+ , ("hindi", \_ -> Lang "hi" "" "" [])
+ , ("croatian", \_ -> Lang "hr" "" "" [])
+ , ("magyar", \_ -> Lang "hu" "" "" [])
+ , ("armenian", \_ -> Lang "hy" "" "" [])
+ , ("interlingua", \_ -> Lang "ia" "" "" [])
+ , ("indonesian", \_ -> Lang "id" "" "" [])
+ , ("icelandic", \_ -> Lang "is" "" "" [])
+ , ("italian", \_ -> Lang "it" "" "" [])
+ , ("japanese", \_ -> Lang "jp" "" "" [])
+ , ("khmer", \_ -> Lang "km" "" "" [])
+ , ("kurmanji", \_ -> Lang "kmr" "" "" [])
+ , ("kannada", \_ -> Lang "kn" "" "" [])
+ , ("korean", \_ -> Lang "ko" "" "" [])
+ , ("lao", \_ -> Lang "lo" "" "" [])
+ , ("lithuanian", \_ -> Lang "lt" "" "" [])
+ , ("latvian", \_ -> Lang "lv" "" "" [])
+ , ("malayalam", \_ -> Lang "ml" "" "" [])
+ , ("mongolian", \_ -> Lang "mn" "" "" [])
+ , ("marathi", \_ -> Lang "mr" "" "" [])
+ , ("dutch", \_ -> Lang "nl" "" "" [])
+ , ("nynorsk", \_ -> Lang "nn" "" "" [])
+ , ("norsk", \_ -> Lang "no" "" "" [])
+ , ("nko", \_ -> Lang "nqo" "" "" [])
+ , ("occitan", \_ -> Lang "oc" "" "" [])
+ , ("panjabi", \_ -> Lang "pa" "" "" [])
+ , ("polish", \_ -> Lang "pl" "" "" [])
+ , ("piedmontese", \_ -> Lang "pms" "" "" [])
+ , ("portuguese", \_ -> Lang "pt" "" "" [])
+ , ("romansh", \_ -> Lang "rm" "" "" [])
+ , ("romanian", \_ -> Lang "ro" "" "" [])
+ , ("russian", \_ -> Lang "ru" "" "" [])
+ , ("sanskrit", \_ -> Lang "sa" "" "" [])
+ , ("samin", \_ -> Lang "se" "" "" [])
+ , ("slovak", \_ -> Lang "sk" "" "" [])
+ , ("albanian", \_ -> Lang "sq" "" "" [])
+ , ("serbian", \_ -> Lang "sr" "" "" [])
+ , ("swedish", \_ -> Lang "sv" "" "" [])
+ , ("syriac", \_ -> Lang "syr" "" "" [])
+ , ("tamil", \_ -> Lang "ta" "" "" [])
+ , ("telugu", \_ -> Lang "te" "" "" [])
+ , ("thai", \_ -> Lang "th" "" "" [])
+ , ("turkmen", \_ -> Lang "tk" "" "" [])
+ , ("turkish", \_ -> Lang "tr" "" "" [])
+ , ("ukrainian", \_ -> Lang "uk" "" "" [])
+ , ("urdu", \_ -> Lang "ur" "" "" [])
+ , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ ]
+
+babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 s =
+ case s of
+ "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
+ "naustrian" -> Just $ Lang "de" "" "AT" []
+ "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
+ "nswissgerman" -> Just $ Lang "de" "" "CH" []
+ "german" -> Just $ Lang "de" "" "DE" ["1901"]
+ "ngerman" -> Just $ Lang "de" "" "DE" []
+ "lowersorbian" -> Just $ Lang "dsb" "" "" []
+ "uppersorbian" -> Just $ Lang "hsb" "" "" []
+ "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
+ "slovene" -> Just $ Lang "sl" "" "" []
+ "australian" -> Just $ Lang "en" "" "AU" []
+ "canadian" -> Just $ Lang "en" "" "CA" []
+ "british" -> Just $ Lang "en" "" "GB" []
+ "newzealand" -> Just $ Lang "en" "" "NZ" []
+ "american" -> Just $ Lang "en" "" "US" []
+ "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
new file mode 100644
index 000000000..9256217fe
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -0,0 +1,668 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Parsing
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+General parsing types and functions for LaTeX.
+-}
+module Text.Pandoc.Readers.LaTeX.Parsing
+ ( DottedNum(..)
+ , renderDottedNum
+ , incrementDottedNum
+ , LaTeXState(..)
+ , defaultLaTeXState
+ , LP
+ , withVerbatimMode
+ , rawLaTeXParser
+ , applyMacros
+ , tokenize
+ , untokenize
+ , untoken
+ , totoks
+ , toksToString
+ , satisfyTok
+ , doMacros
+ , setpos
+ , anyControlSeq
+ , anySymbol
+ , isNewlineTok
+ , isWordTok
+ , isArgTok
+ , spaces
+ , spaces1
+ , tokTypeIn
+ , controlSeq
+ , symbol
+ , symbolIn
+ , sp
+ , whitespace
+ , newlineTok
+ , comment
+ , anyTok
+ , singleChar
+ , specialChars
+ , endline
+ , blankline
+ , primEscape
+ , bgroup
+ , egroup
+ , grouped
+ , braced
+ , braced'
+ , bracedUrl
+ , bracedOrToken
+ , bracketed
+ , bracketedToks
+ , parenWrapped
+ , dimenarg
+ , ignore
+ , withRaw
+ ) where
+
+import Prelude
+import Control.Applicative (many, (<|>))
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Control.Monad.Trans (lift)
+import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
+import Data.Default
+import Data.List (intercalate)
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
+ ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Shared
+import Text.Parsec.Pos
+
+newtype DottedNum = DottedNum [Int]
+ deriving (Show)
+
+renderDottedNum :: DottedNum -> String
+renderDottedNum (DottedNum xs) =
+ intercalate "." (map show xs)
+
+incrementDottedNum :: Int -> DottedNum -> DottedNum
+incrementDottedNum level (DottedNum ns) = DottedNum $
+ case reverse (take level (ns ++ repeat 0)) of
+ (x:xs) -> reverse (x+1 : xs)
+ [] -> [] -- shouldn't happen
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: (Maybe Inlines, Maybe String)
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ , sLastHeaderNum :: DottedNum
+ , sLastFigureNum :: DottedNum
+ , sLabels :: M.Map String [Inline]
+ , sHasChapters :: Bool
+ , sToggles :: M.Map String Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = (Nothing, Nothing)
+ , sInListItem = False
+ , sInTableCell = False
+ , sLastHeaderNum = DottedNum []
+ , sLastFigureNum = DottedNum []
+ , sLabels = M.empty
+ , sHasChapters = False
+ , sToggles = M.empty
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
+rawLaTeXParser retokenize parser valParser = do
+ inp <- getInput
+ let toks = tokenize "source" $ T.pack inp
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ let lstate' = lstate { sMacros = extractMacros pstate }
+ let rawparser = (,) <$> withRaw valParser <*> getState
+ res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ case res' of
+ Left _ -> mzero
+ Right toks' -> do
+ res <- lift $ runParserT (do when retokenize $ do
+ -- retokenize, applying macros
+ doMacros 0
+ ts <- many (satisfyTok (const True))
+ setInput ts
+ rawparser)
+ lstate' "chunk" toks'
+ case res of
+ Left _ -> mzero
+ Right ((val, raw), st) -> do
+ updateState (updateMacros (sMacros st <>))
+ _ <- takeP (T.length (untokenize toks'))
+ return (val, T.unpack (untokenize raw))
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
+ do let retokenize = doMacros 0 *>
+ (toksToString <$> many (satisfyTok (const True)))
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s'
+tokenize :: SourceName -> Text -> [Tok]
+tokenize sourcename = totoks (initialPos sourcename)
+
+totoks :: SourcePos -> Text -> [Tok]
+totoks pos t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok pos Newline "\n"
+ : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok pos Spaces sps
+ : totoks (incSourceColumn pos (T.length sps))
+ rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok pos Word ws
+ : totoks (incSourceColumn pos (T.length ws)) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok pos Comment ("%" <> cs)
+ : totoks (incSourceColumn pos (1 + T.length cs)) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok pos (CtrlSeq " ") "\\"]
+ Just (d, rest')
+ | isLetterOrAt d ->
+ -- \makeatletter is common in macro defs;
+ -- ideally we should make tokenization sensitive
+ -- to \makeatletter and \makeatother, but this is
+ -- probably best for now
+ let (ws, rest'') = T.span isLetterOrAt rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (incSourceColumn pos
+ (1 + T.length ws + T.length ss)) rest'''
+ | isSpaceOrTab d || d == '\n' ->
+ let (w1, r1) = T.span isSpaceOrTab rest
+ (w2, (w3, r3)) = case T.uncons r1 of
+ Just ('\n', r2)
+ -> (T.pack "\n",
+ T.span isSpaceOrTab r2)
+ _ -> (mempty, (mempty, r1))
+ ws = "\\" <> w1 <> w2 <> w3
+ in case T.uncons r3 of
+ Just ('\n', _) ->
+ Tok pos (CtrlSeq " ") ("\\" <> w1)
+ : totoks (incSourceColumn pos (T.length ws))
+ r1
+ _ ->
+ Tok pos (CtrlSeq " ") ws
+ : totoks (incSourceColumn pos (T.length ws))
+ r3
+ | otherwise ->
+ Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (incSourceColumn pos 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok pos (Arg i) ("#" <> t1)
+ : totoks (incSourceColumn pos (1 + T.length t1)) t2
+ Nothing ->
+ Tok pos Symbol "#"
+ : totoks (incSourceColumn pos 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok pos Esc2 (T.pack ['^','^',d,e])
+ : totoks (incSourceColumn pos 4) rest'''
+ _ ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ | d < '\128' ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ _ -> Tok pos Symbol "^" :
+ Tok (incSourceColumn pos 1) Symbol "^" :
+ totoks (incSourceColumn pos 2) rest'
+ _ -> Tok pos Symbol "^"
+ : totoks (incSourceColumn pos 1) rest
+ | otherwise ->
+ Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
+
+isSpaceOrTab :: Char -> Bool
+isSpaceOrTab ' ' = True
+isSpaceOrTab '\t' = True
+isSpaceOrTab _ = False
+
+isLetterOrAt :: Char -> Bool
+isLetterOrAt '@' = True
+isLetterOrAt c = isLetter c
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos _spos _ (Tok pos _ _ : _) = pos
+ updatePos spos _ [] = incSourceColumn spos 1
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ unless verbatimMode $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok _ (CtrlSeq "expandafter") _ : t : ts
+ -> do setInput ts
+ doMacros n
+ getInput >>= setInput . combineTok t
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
+ | T.all isLetterOrAt w =
+ Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
+ where (x1, x2) = T.break isSpaceOrTab x
+ combineTok t ts = t:ts
+ handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro expansionPoint argspecs optarg newtoks) -> do
+ setInput ts
+ let matchTok (Tok _ toktype txt) =
+ satisfyTok (\(Tok _ toktype' txt') ->
+ toktype == toktype' &&
+ txt == txt')
+ let matchPattern toks = try $ mapM_ matchTok toks
+ let getargs argmap [] = return argmap
+ getargs argmap (Pattern toks : rest) = try $ do
+ matchPattern toks
+ getargs argmap rest
+ getargs argmap (ArgNum i : Pattern toks : rest) =
+ try $ do
+ x <- mconcat <$> manyTill
+ (braced <|> ((:[]) <$> anyTok))
+ (matchPattern toks)
+ getargs (M.insert i x argmap) rest
+ getargs argmap (ArgNum i : rest) = do
+ x <- try $ spaces >> bracedOrToken
+ getargs (M.insert i x argmap) rest
+ args <- case optarg of
+ Nothing -> getargs M.empty argspecs
+ Just o -> do
+ x <- option o bracketedToks
+ getargs (M.singleton 1 x) argspecs
+ -- first boolean param is true if we're tokenizing
+ -- an argument (in which case we don't want to
+ -- expand #1 etc.)
+ let addTok False (Tok _ (Arg i) _) acc =
+ case M.lookup i args of
+ Nothing -> mzero
+ Just xs -> foldr (addTok True) acc xs
+ -- see #4007
+ addTok _ (Tok _ (CtrlSeq x) txt)
+ acc@(Tok _ Word _ : _)
+ | not (T.null txt) &&
+ isLetter (T.last txt) =
+ Tok spos (CtrlSeq x) (txt <> " ") : acc
+ addTok _ t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr (addTok False) ts' newtoks
+ case expansionPoint of
+ ExpandWhenUsed ->
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+ ExpandWhenDefined -> return ()
+
+
+setpos :: SourcePos -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+
+isCtrlSeq :: Tok -> Bool
+isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSymbolTok
+
+isSymbolTok :: Tok -> Bool
+isSymbolTok (Tok _ Symbol _) = True
+isSymbolTok _ = False
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
+
+isArgTok :: Tok -> Bool
+isArgTok (Tok _ (Arg _) _) = True
+isArgTok _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
+
+sp :: PandocMonad m => LP m ()
+sp = whitespace <|> endline
+
+whitespace :: PandocMonad m => LP m ()
+whitespace = () <$ satisfyTok isSpaceTok
+
+isSpaceTok :: Tok -> Bool
+isSpaceTok (Tok _ Spaces _) = True
+isSpaceTok _ = False
+
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
+
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
+
+comment :: PandocMonad m => LP m ()
+comment = () <$ satisfyTok isCommentTok
+
+isCommentTok :: Tok -> Bool
+isCommentTok (Tok _ Comment _) = True
+isCommentTok _ = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
+
+singleChar :: PandocMonad m => LP m Tok
+singleChar = try $ do
+ Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
+ return $ Tok pos toktype t1
+ else return $ Tok pos toktype t
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
+bgroup = try $ do
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+
+egroup :: PandocMonad m => LP m Tok
+egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
+grouped parser = try $ do
+ bgroup
+ -- first we check for an inner 'grouped', because
+ -- {{a,b}} should be parsed the same as {a,b}
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
+braced' getTok n =
+ handleEgroup <|> handleBgroup <|> handleOther
+ where handleEgroup = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' getTok (n - 1)
+ handleBgroup = do
+ t <- bgroup
+ (t:) <$> braced' getTok (n + 1)
+ handleOther = do
+ t <- getTok
+ (t:) <$> braced' getTok n
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' anyTok 1
+
+-- URLs require special handling, because they can contain %
+-- characters. So we retonenize comments as we go...
+bracedUrl :: PandocMonad m => LP m [Tok]
+bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+
+-- For handling URLs, which allow literal % characters...
+retokenizeComment :: PandocMonad m => LP m ()
+retokenizeComment = (do
+ Tok pos Comment txt <- satisfyTok isCommentTok
+ let updPos (Tok pos' toktype' txt') =
+ Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
+ (sourceColumn pos)) toktype' txt'
+ let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt
+ getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
+ <|> return ()
+
+bracedOrToken :: PandocMonad m => LP m [Tok]
+bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+
+bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+
+parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a
+parenWrapped parser = try $ do
+ symbol '('
+ mconcat <$> manyTill parser (symbol ')')
+
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ T.take 2 (T.reverse s) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
+
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index fa832114b..e3a302d49 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -31,6 +31,7 @@ Types for LaTeX tokens and macros.
module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
, TokType(..)
, Macro(..)
+ , ArgSpec(..)
, ExpansionPoint(..)
, SourcePos
)
@@ -49,5 +50,8 @@ data Tok = Tok SourcePos TokType Text
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
-data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok]
+data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+ deriving Show
+
+data ArgSpec = ArgNum Int | Pattern [Tok]
deriving Show
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 156b2b622..d1ea7a1a5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
@@ -31,31 +32,28 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
+import qualified Data.ByteString.Lazy as BS
import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import qualified Data.HashMap.Strict as H
import Data.List (intercalate, sortBy, transpose, elemIndex)
import qualified Data.Map as M
import Data.Maybe
import Data.Ord (comparing)
-import Data.Scientific (base10Exponent, coefficient)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.Vector as V
-import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..), report)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
@@ -234,11 +232,9 @@ pandocTitleBlock = try $ do
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
- pos <- getPosition
string "---"
blankline
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
@@ -246,52 +242,44 @@ yamlMetaBlock = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> do
- let alist = H.toList hashmap
- mapM_ (\(k, v) ->
- if ignorable k
- then return ()
- else do
- v' <- yamlToMeta v
- let k' = T.unpack k
- updateState $ \st -> st{ stateMeta' =
- do m <- stateMeta' st
- -- if there's already a value, leave it unchanged
- case lookupMeta k' m of
- Just _ -> return m
- Nothing -> do
- v'' <- v'
- return $ B.setMeta (T.unpack k) v'' m}
- ) alist
- Right Yaml.Null -> return ()
+ newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml
+ -- Since `<>` is left-biased, existing values are not touched:
+ updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
+ return mempty
+
+-- | Read a YAML string and convert it to pandoc metadata.
+-- String scalars in the YAML are parsed as Markdown.
+yamlToMeta :: PandocMonad m => BS.ByteString -> m Meta
+yamlToMeta bstr = do
+ let parser = do
+ meta <- yamlBsToMeta bstr
+ return $ runF meta defaultParserState
+ parsed <- readWithM parser def ""
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta)
+yamlBsToMeta bstr = do
+ pos <- getPosition
+ case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
+ Right ((YAML.Doc (YAML.Mapping _ o)):_) -> (fmap Meta) <$> yamlMap o
+ Right [] -> return . return $ mempty
+ Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return . return $ mempty
Right _ -> do
- logMessage $
- CouldNotParseYamlMetadata "not an object"
- pos
- return ()
+ logMessage $
+ CouldNotParseYamlMetadata "not an object"
+ pos
+ return . return $ mempty
Left err' -> do
- case err' of
- InvalidYaml (Just YamlParseException{
- yamlProblem = problem
- , yamlContext = _ctxt
- , yamlProblemMark = Yaml.YamlMark {
- yamlLine = yline
- , yamlColumn = ycol
- }}) ->
- logMessage $ CouldNotParseYamlMetadata
- problem (setSourceLine
- (setSourceColumn pos
- (sourceColumn pos + ycol))
- (sourceLine pos + 1 + yline))
- _ -> logMessage $ CouldNotParseYamlMetadata
- (show err') pos
- return ()
- return mempty
+ logMessage $ CouldNotParseYamlMetadata
+ err' pos
+ return . return $ mempty
--- ignore fields ending with _
-ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
+nodeToKey :: Monad m => YAML.Node -> m Text
+nodeToKey (YAML.Scalar (YAML.SStr t)) = return t
+nodeToKey (YAML.Scalar (YAML.SUnknown _ t)) = return t
+nodeToKey _ = fail "Non-string key in YAML mapping"
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
@@ -312,34 +300,51 @@ toMetaValue x =
-- not end in a newline, but a "block" set off with
-- `|` or `>` will.
-yamlToMeta :: PandocMonad m
- => Yaml.Value -> MarkdownParser m (F MetaValue)
-yamlToMeta (Yaml.String t) = toMetaValue t
-yamlToMeta (Yaml.Number n)
- -- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = return $ return $ MetaString $ show
- $ coefficient n * (10 ^ base10Exponent n)
- | otherwise = return $ return $ MetaString $ show n
-yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
-yamlToMeta (Yaml.Array xs) = do
- xs' <- mapM yamlToMeta (V.toList xs)
+checkBoolean :: Text -> Maybe Bool
+checkBoolean t =
+ if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
+ then Just True
+ else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
+ then Just False
+ else Nothing
+
+yamlToMetaValue :: PandocMonad m
+ => YAML.Node -> MarkdownParser m (F MetaValue)
+yamlToMetaValue (YAML.Scalar x) =
+ case x of
+ YAML.SStr t -> toMetaValue t
+ YAML.SBool b -> return $ return $ MetaBool b
+ YAML.SFloat d -> return $ return $ MetaString (show d)
+ YAML.SInt i -> return $ return $ MetaString (show i)
+ YAML.SUnknown _ t ->
+ case checkBoolean t of
+ Just b -> return $ return $ MetaBool b
+ Nothing -> toMetaValue t
+ YAML.SNull -> return $ return $ MetaString ""
+yamlToMetaValue (YAML.Sequence _ xs) = do
+ xs' <- mapM yamlToMetaValue xs
return $ do
xs'' <- sequence xs'
return $ B.toMetaValue xs''
-yamlToMeta (Yaml.Object o) = do
- let alist = H.toList o
- foldM (\m (k,v) ->
- if ignorable k
- then return m
- else do
- v' <- yamlToMeta v
- return $ do
- MetaMap m' <- m
- v'' <- v'
- return (MetaMap $ M.insert (T.unpack k) v'' m'))
- (return $ MetaMap M.empty)
- alist
-yamlToMeta _ = return $ return $ MetaString ""
+yamlToMetaValue (YAML.Mapping _ o) = fmap B.toMetaValue <$> yamlMap o
+yamlToMetaValue _ = return $ return $ MetaString ""
+
+yamlMap :: PandocMonad m
+ => M.Map YAML.Node YAML.Node
+ -> MarkdownParser m (F (M.Map String MetaValue))
+yamlMap o = do
+ kvs <- forM (M.toList o) $ \(key, v) -> do
+ k <- nodeToKey key
+ return (k, v)
+ let kvs' = filter (not . ignorable . fst) kvs
+ (fmap M.fromList . sequence) <$> mapM toMeta kvs'
+ where
+ ignorable t = (T.pack "_") `T.isSuffixOf` t
+ toMeta (k, v) = do
+ fv <- yamlToMetaValue v
+ return $ do
+ v' <- fv
+ return (T.unpack k, v')
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -966,7 +971,9 @@ orderedList = try $ do
<|> return (style == Example)
items <- fmap sequence $ many1 $ listItem fourSpaceRule
(orderedListStart (Just (style, delim)))
- start' <- (start <$ guardEnabled Ext_startnum) <|> return 1
+ start' <- if style == Example
+ then return start
+ else (start <$ guardEnabled Ext_startnum) <|> return 1
return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
@@ -1142,10 +1149,9 @@ rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
lookAhead $ try $ char '\\' >> letter
- result <- (B.rawBlock "context" . trim . concat <$>
- many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand)
- <*> spnl'))
- <|> (B.rawBlock "latex" . trim . concat <$>
+ result <- (B.rawBlock "tex" . trim . concat <$>
+ many1 ((++) <$> rawConTeXtEnvironment <*> spnl'))
+ <|> (B.rawBlock "tex" . trim . concat <$>
many1 ((++) <$> rawLaTeXBlock <*> spnl'))
return $ case B.toList result of
[RawBlock _ cs]
@@ -1153,9 +1159,6 @@ rawTeXBlock = do
-- don't create a raw block for suppressed macro defs
_ -> return result
-conTeXtCommand :: PandocMonad m => MarkdownParser m String
-conTeXtCommand = oneOfStrings ["\\placeformula"]
-
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
@@ -1591,7 +1594,7 @@ code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- (trim . concat) <$>
- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ manyTill (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
@@ -1877,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
- (orig, src) <- uri <|> emailAddress
+ (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
- return $ return $ B.link src "" (B.str orig)
+ return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
- (orig, src) <- uri <|> emailAddress
+ (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-- in rare cases, something may remain after the uri parser
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
- attr <- option nullAttr $ try $
+ attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
- return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+ return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
+ (B.str $ orig ++ extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
@@ -2037,9 +2041,9 @@ emoji = try $ do
char ':'
emojikey <- many1 (oneOf emojiChars)
char ':'
- case M.lookup emojikey emojis of
- Just s -> return (return (B.str s))
- Nothing -> mzero
+ case emojiToInline emojikey of
+ Just i -> return (return $ B.singleton i)
+ Nothing -> mzero
-- Citations
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index fe6b3698c..134598c07 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -35,34 +35,32 @@ TODO:
- Page breaks (five "*")
- Org tables
- table.el tables
-- Images with attributes (floating and width)
- <cite> tag
-}
module Text.Pandoc.Readers.Muse (readMuse) where
import Prelude
import Control.Monad
+import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
-import Data.Char (isLetter)
+import Data.Char (isAlphaNum)
import Data.Default
-import Data.List (stripPrefix, intercalate)
+import Data.List (intercalate)
import Data.List.Split (splitOn)
import qualified Data.Map as M
import qualified Data.Set as Set
-import Data.Maybe (fromMaybe, isNothing)
+import Data.Maybe (fromMaybe, isNothing, maybeToList)
import Data.Text (Text, unpack)
-import System.FilePath (takeExtension)
-import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Readers.HTML (htmlTag)
-import Text.Pandoc.Shared (crFilter, underlineSpan)
+import Text.Pandoc.Parsing hiding (F, enclosed)
+import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -70,7 +68,8 @@ readMuse :: PandocMonad m
-> Text
-> m Pandoc
readMuse opts s = do
- res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s))
+ let input = crFilter s
+ res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def
case res of
Left e -> throwError e
Right d -> return d
@@ -84,7 +83,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
, museNotes :: M.Map String (SourcePos, F Blocks)
- , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
, museInPara :: Bool -- ^ True when looking for a paragraph terminator
}
@@ -96,11 +94,17 @@ instance Default MuseState where
, museLastStrPos = Nothing
, museLogMessages = []
, museNotes = M.empty
- , museInLink = False
, museInPara = False
}
-type MuseParser = ParserT String MuseState
+data MuseEnv =
+ MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+ }
+
+instance Default MuseEnv where
+ def = MuseEnv { museInLink = False }
+
+type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@@ -125,11 +129,9 @@ instance HasLogMessages MuseState where
parseMuse :: PandocMonad m => MuseParser m Pandoc
parseMuse = do
many directive
- blocks <- parseBlocks
+ blocks <- (:) <$> parseBlocks <*> many parseSection
st <- getState
- let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
- meta <- museMeta st
- return $ Pandoc meta bs) st
+ let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st
reportLogMessages
return doc
@@ -144,9 +146,8 @@ commonPrefix (x:xs) (y:ys)
-- | Trim up to one newline from the beginning of the string.
lchop :: String -> String
-lchop s = case s of
- '\n':ss -> ss
- _ -> s
+lchop ('\n':xs) = xs
+lchop s = s
-- | Trim up to one newline from the end of the string.
rchop :: String -> String
@@ -165,12 +166,19 @@ atStart p = do
guard $ museLastStrPos st /= Just pos
p
+firstColumn :: PandocMonad m => MuseParser m ()
+firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol = void newline <|> eof
+getIndent :: PandocMonad m
+ => MuseParser m Int
+getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
+
someUntil :: (Stream s m t)
=> ParserT s u m a
-> ParserT s u m b
@@ -179,28 +187,21 @@ someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
-- ** HTML parsers
--- | Parse HTML tag, returning its attributes and literal contents.
-htmlElement :: PandocMonad m
- => String -- ^ Tag name
- -> MuseParser m (Attr, String)
-htmlElement tag = try $ do
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar endtag
- return (htmlAttrToPandoc attr, content)
+openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag tag = try $
+ char '<' *> string tag *> manyTill attr (char '>')
where
- endtag = void $ htmlTag (~== TagClose tag)
+ attr = try $ (,)
+ <$ many1 spaceChar
+ <*> many1 (noneOf "=\n")
+ <* string "=\""
+ <*> manyTill (noneOf "\"") (char '"')
-htmlBlock :: PandocMonad m
- => String -- ^ Tag name
- -> MuseParser m (Attr, String)
-htmlBlock tag = try $ do
- many spaceChar
- res <- htmlElement tag
- manyTill spaceChar eol
- return res
+closeTag :: PandocMonad m => String -> MuseParser m ()
+closeTag tag = try $ string "</" *> string tag *> void (char '>')
-- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc :: [(String, String)] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
@@ -211,15 +212,12 @@ parseHtmlContent :: PandocMonad m
=> String -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ do
- many spaceChar
- pos <- getPosition
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ indent <- getIndent
+ attr <- openTag tag
manyTill spaceChar eol
- content <- parseBlocksTill $ try $ count (sourceColumn pos - 1) spaceChar >> endtag
+ content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag
manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
return (htmlAttrToPandoc attr, content)
- where
- endtag = void $ htmlTag (~== TagClose tag)
-- ** Directive parsers
@@ -228,21 +226,19 @@ parseDirectiveKey :: PandocMonad m => MuseParser m String
parseDirectiveKey = char '#' *> many (letter <|> char '-')
parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseEmacsDirective = do
- key <- parseDirectiveKey
- spaceChar
- value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol
- return (key, value)
+parseEmacsDirective = (,)
+ <$> parseDirectiveKey
+ <* spaceChar
+ <*> (trimInlinesF . mconcat <$> manyTill inline' eol)
parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseAmuseDirective = do
- key <- parseDirectiveKey
- many1 spaceChar
- value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective
- many blankline
- return (key, value)
+parseAmuseDirective = (,)
+ <$> parseDirectiveKey
+ <* many1 spaceChar
+ <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective)
+ <* many blankline
where
- endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey))
+ endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey))
directive :: PandocMonad m => MuseParser m ()
directive = do
@@ -254,17 +250,20 @@ directive = do
-- ** Block parsers
+-- | Parse section contents until EOF or next header
parseBlocks :: PandocMonad m
=> MuseParser m (F Blocks)
parseBlocks =
try (parseEnd <|>
+ nextSection <|>
blockStart <|>
listStart <|>
paraStart)
where
+ nextSection = mempty <$ lookAhead headingStart
parseEnd = mempty <$ eof
- blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock)
- <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+ blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock)
+ <*> parseBlocks
listStart = do
updateState (\st -> st { museInPara = False })
uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
@@ -273,6 +272,13 @@ parseBlocks =
uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks
where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id
+-- | Parse section that starts with a header
+parseSection :: PandocMonad m
+ => MuseParser m (F Blocks)
+parseSection =
+ ((B.<>) <$> emacsHeading <*> parseBlocks) <|>
+ (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+
parseBlocksTill :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks)
@@ -347,31 +353,32 @@ blockElements = do
-- | Parse a line comment, starting with @;@ in the first column.
comment :: PandocMonad m => MuseParser m (F Blocks)
-comment = try $ do
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- char ';'
- optional (spaceChar >> many (noneOf "\n"))
- eol
- return mempty
+comment = try $ mempty
+ <$ firstColumn
+ <* char ';'
+ <* optional (spaceChar *> many (noneOf "\n"))
+ <* eol
-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
separator :: PandocMonad m => MuseParser m (F Blocks)
-separator = try $ do
- string "----"
- many $ char '-'
- many spaceChar
- eol
- return $ return B.horizontalRule
+separator = try $ pure B.horizontalRule
+ <$ string "----"
+ <* many (char '-')
+ <* many spaceChar
+ <* eol
+
+headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart = try $ (,)
+ <$> option "" (try (parseAnchor <* manyTill spaceChar eol))
+ <* firstColumn
+ <*> fmap length (many1 $ char '*')
+ <* spaceChar
-- | Parse a single-line heading.
emacsHeading :: PandocMonad m => MuseParser m (F Blocks)
emacsHeading = try $ do
guardDisabled Ext_amuse
- anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- level <- fmap length $ many1 $ char '*'
- guard $ level <= 5
- spaceChar
+ (anchorId, level) <- headingStart
content <- trimInlinesF . mconcat <$> manyTill inline eol
attr <- registerHeader (anchorId, [], []) (runF content def)
return $ B.headerWith attr level <$> content
@@ -383,11 +390,7 @@ amuseHeadingUntil :: PandocMonad m
-> MuseParser m (F Blocks, a)
amuseHeadingUntil end = try $ do
guardEnabled Ext_amuse
- anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- level <- fmap length $ many1 $ char '*'
- guard $ level <= 5
- spaceChar
+ (anchorId, level) <- headingStart
(content, e) <- paraContentsUntil end
attr <- registerHeader (anchorId, [], []) (runF content def)
return (B.headerWith attr level <$> content, e)
@@ -395,33 +398,28 @@ amuseHeadingUntil end = try $ do
-- | Parse an example between @{{{@ and @}}}@.
-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
example :: PandocMonad m => MuseParser m (F Blocks)
-example = try $ do
- string "{{{"
- optional blankline
- contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
- return $ return $ B.codeBlock contents
+example = try $ pure . B.codeBlock
+ <$ string "{{{"
+ <* optional blankline
+ <*> manyTill anyChar (try (optional blankline *> string "}}}"))
-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
-exampleTag = try $ do
- (attr, contents) <- htmlBlock "example"
- return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
+exampleTag = try $ fmap pure $ B.codeBlockWith
+ <$ many spaceChar
+ <*> (htmlAttrToPandoc <$> openTag "example")
+ <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "example"))
+ <* manyTill spaceChar eol
-- | Parse a @\<literal>@ tag as a raw block.
-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
literalTag :: PandocMonad m => MuseParser m (F Blocks)
-literalTag = try $ do
- many spaceChar
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen "literal" [])
- manyTill spaceChar eol
- content <- manyTill anyChar endtag
- manyTill spaceChar eol
- return $ return $ rawBlock (htmlAttrToPandoc attr, content)
- where
- endtag = void $ htmlTag (~== TagClose "literal")
- -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
- rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
+literalTag = try $ fmap pure $ B.rawBlock
+ <$ many spaceChar
+ <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ <* manyTill spaceChar eol
+ <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "literal"))
+ <* manyTill spaceChar eol
-- | Parse @\<center>@ tag.
-- Currently it is ignored as Pandoc cannot represent centered blocks.
@@ -459,25 +457,27 @@ playTag = do
fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play"
verseLine :: PandocMonad m => MuseParser m (F Inlines)
-verseLine = do
- indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty
- rest <- manyTill (choice inlineList) newline
- return $ trimInlinesF $ mconcat (pure indent : rest)
-
-verseLines :: PandocMonad m => MuseParser m (F Blocks)
-verseLines = do
- lns <- many verseLine
- return $ B.lineBlock <$> sequence lns
+verseLine = (<>)
+ <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+ <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
-- | Parse @\<verse>@ tag.
verseTag :: PandocMonad m => MuseParser m (F Blocks)
-verseTag = do
- (_, content) <- htmlBlock "verse"
- parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
+verseTag = try $ do
+ indent <- getIndent
+ openTag "verse"
+ manyTill spaceChar eol
+ content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse")
+ manyTill spaceChar eol
+ return $ B.lineBlock <$> content
-- | Parse @\<comment>@ tag.
commentTag :: PandocMonad m => MuseParser m (F Blocks)
-commentTag = htmlBlock "comment" >> return mempty
+commentTag = try $ mempty
+ <$ many spaceChar
+ <* openTag "comment"
+ <* manyTill anyChar (closeTag "comment")
+ <* manyTill spaceChar eol
-- | Parse paragraph contents.
paraContentsUntil :: PandocMonad m
@@ -485,7 +485,7 @@ paraContentsUntil :: PandocMonad m
-> MuseParser m (F Inlines, a)
paraContentsUntil end = do
updateState (\st -> st { museInPara = True })
- (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
+ (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end)
updateState (\st -> st { museInPara = False })
return (trimInlinesF $ mconcat l, e)
@@ -499,9 +499,10 @@ paraUntil end = do
first (fmap B.para) <$> paraContentsUntil end
noteMarker :: PandocMonad m => MuseParser m String
-noteMarker = try $ do
- char '['
- (:) <$> oneOf "123456789" <*> manyTill digit (char ']')
+noteMarker = try $ (:)
+ <$ char '['
+ <*> oneOf "123456789"
+ <*> manyTill digit (char ']')
-- Amusewiki version of note
-- Parsing is similar to list item, except that note marker is used instead of list marker
@@ -541,27 +542,15 @@ emacsNoteBlock = try $ do
-- Verse markup
--
-lineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-lineVerseLine = try $ do
- string "> "
- indent <- many (char ' ' >> pure '\160')
- let indentEl = if null indent then mempty else B.str indent
- rest <- manyTill (choice inlineList) eol
- return $ trimInlinesF $ mconcat (pure indentEl : rest)
-
-blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-blanklineVerseLine = try $ do
- char '>'
- blankline
- pure mempty
-
-- | Parse a line block indicated by @\'>\'@ characters.
lineBlock :: PandocMonad m => MuseParser m (F Blocks)
lineBlock = try $ do
- many spaceChar
- col <- sourceColumn <$> getPosition
- lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
+ indent <- getIndent
+ lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
return $ B.lineBlock <$> sequence lns
+ where
+ blankVerseLine = try $ mempty <$ char '>' <* blankline
+ nonblankVerseLine = try (string "> ") *> verseLine
-- *** List parsers
@@ -573,7 +562,7 @@ bulletListItemsUntil indent end = try $ do
char '-'
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
return (x:xs, e)
-- | Parse a bullet list.
@@ -581,19 +570,9 @@ bulletListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
bulletListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guard $ indent /= 0
- (items, e) <- bulletListItemsUntil indent end
- return (B.bulletList <$> sequence items, e)
-
--- | Parses an ordered list marker and returns list attributes.
-anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes
-anyMuseOrderedListMarker = do
- (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
- char '.'
- return (start, style, Period)
+ first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end
museOrderedListMarker :: PandocMonad m
=> ListNumberStyle
@@ -620,7 +599,7 @@ orderedListItemsUntil indent style end =
pos <- getPosition
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
return (x:xs, e)
-- | Parse an ordered list.
@@ -628,14 +607,12 @@ orderedListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
orderedListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guard $ indent /= 0
- p@(_, style, _) <- anyMuseOrderedListMarker
- guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
- (items, e) <- orderedListItemsUntil indent style end
- return (B.orderedListWith p <$> sequence items, e)
+ (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
+ char '.'
+ first (fmap (B.orderedListWith (start, style, Period)) . sequence)
+ <$> orderedListItemsUntil indent style end
descriptionsUntil :: PandocMonad m
=> Int
@@ -644,7 +621,7 @@ descriptionsUntil :: PandocMonad m
descriptionsUntil indent end = do
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
return (x:xs, e)
definitionListItemsUntil :: PandocMonad m
@@ -656,8 +633,8 @@ definitionListItemsUntil indent end =
where
continuation = try $ do
pos <- getPosition
- term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::")
- (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end))
+ term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::")
+ (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end))
let xx = (,) <$> term <*> sequence x
return (xx:xs, e)
@@ -666,9 +643,7 @@ definitionListUntil :: PandocMonad m
=> MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
definitionListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse
first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
@@ -713,7 +688,7 @@ museAppendElement element tbl =
tableCell :: PandocMonad m => MuseParser m (F Blocks)
tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
- where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol
+ where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol
tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
tableElements = sequence <$> (tableParseElement `sepEndBy1` eol)
@@ -735,11 +710,10 @@ tableParseElement = tableParseHeader
tableParseRow :: PandocMonad m
=> Int -- ^ Number of separator characters
-> MuseParser m (F [Blocks])
-tableParseRow n = try $ do
- fields <- tableCell `sepBy2` fieldSep
- return $ sequence fields
- where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
- fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+tableParseRow n = try $
+ sequence <$> (tableCell `sepBy2` fieldSep)
+ where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p)
+ fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline))
-- | Parse a table header row.
tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
@@ -755,53 +729,51 @@ tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3
-- | Parse table caption.
tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
-tableParseCaption = try $ do
- many spaceChar
- string "|+"
- fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
+tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat
+ <$ many spaceChar
+ <* string "|+"
+ <*> many1Till inline (try $ string "+|")
-- ** Inline parsers
-inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
-inlineList = [ whitespace
- , br
- , anchor
- , footnote
- , strong
- , strongTag
- , emph
- , emphTag
- , underlined
- , superscriptTag
- , subscriptTag
- , strikeoutTag
- , verbatimTag
- , classTag
- , nbsp
- , link
- , code
- , codeTag
- , mathTag
- , inlineLiteralTag
- , str
- , symbol
- ]
+inline' :: PandocMonad m => MuseParser m (F Inlines)
+inline' = whitespace
+ <|> br
+ <|> anchor
+ <|> footnote
+ <|> strong
+ <|> strongTag
+ <|> emph
+ <|> emphTag
+ <|> underlined
+ <|> superscriptTag
+ <|> subscriptTag
+ <|> strikeoutTag
+ <|> verbatimTag
+ <|> classTag
+ <|> nbsp
+ <|> linkOrImage
+ <|> code
+ <|> codeTag
+ <|> mathTag
+ <|> inlineLiteralTag
+ <|> str
+ <|> symbol
+ <?> "inline"
inline :: PandocMonad m => MuseParser m (F Inlines)
-inline = endline <|> choice inlineList <?> "inline"
+inline = endline <|> inline'
-- | Parse a soft break.
endline :: PandocMonad m => MuseParser m (F Inlines)
-endline = try $ do
- newline
- notFollowedBy blankline
- return $ return B.softbreak
+endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline
parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ do
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- char '#'
- (:) <$> letter <*> many (letter <|> digit <|> char '-')
+parseAnchor = try $ (:)
+ <$ firstColumn
+ <* char '#'
+ <*> letter
+ <*> many (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -812,7 +784,7 @@ anchor = try $ do
-- | Parse a footnote reference.
footnote :: PandocMonad m => MuseParser m (F Inlines)
footnote = try $ do
- inLink <- museInLink <$> getState
+ inLink <- asks museInLink
guard $ not inLink
ref <- noteMarker
return $ do
@@ -825,33 +797,38 @@ footnote = try $ do
return $ B.note contents'
whitespace :: PandocMonad m => MuseParser m (F Inlines)
-whitespace = try $ do
- skipMany1 spaceChar
- return $ return B.space
+whitespace = try $ pure B.space <$ skipMany1 spaceChar
-- | Parse @\<br>@ tag.
br :: PandocMonad m => MuseParser m (F Inlines)
-br = try $ do
- string "<br>"
- return $ return B.linebreak
+br = try $ pure B.linebreak <$ string "<br>"
emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
emphasisBetween c = try $ enclosedInlines c c
+-- | Parses material enclosed between start and end parsers.
+enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
+ -> ParserT s st m end -- ^ end parser
+ -> ParserT s st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT s st m [a]
+enclosed start end parser = try $
+ start *> notFollowedBy spaceChar *> many1Till parser end
+
enclosedInlines :: (PandocMonad m, Show a, Show b)
=> MuseParser m a
-> MuseParser m b
-> MuseParser m (F Inlines)
-enclosedInlines start end = try $
- trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
+enclosedInlines start end = try $ trimInlinesF . mconcat
+ <$> enclosed (atStart start) end inline
+ <* notFollowedBy (satisfy isAlphaNum)
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
=> String -- ^ Tag name
-> MuseParser m (F Inlines)
-inlineTag tag = try $ do
- htmlTag (~== TagOpen tag [])
- mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag))
+inlineTag tag = try $ mconcat
+ <$ openTag tag
+ <*> manyTill inline (closeTag tag)
-- | Parse strong inline markup, indicated by @**@.
strong :: PandocMonad m => MuseParser m (F Inlines)
@@ -864,9 +841,9 @@ emph = fmap B.emph <$> emphasisBetween (char '*')
-- | Parse underline inline markup, indicated by @_@.
-- Supported only in Emacs Muse mode, not Text::Amuse.
underlined :: PandocMonad m => MuseParser m (F Inlines)
-underlined = do
- guardDisabled Ext_amuse -- Supported only by Emacs Muse
- fmap underlineSpan <$> emphasisBetween (char '_')
+underlined = fmap underlineSpan
+ <$ guardDisabled Ext_amuse -- Supported only by Emacs Muse
+ <*> emphasisBetween (char '_')
-- | Parse @\<strong>@ tag.
strongTag :: PandocMonad m => MuseParser m (F Inlines)
@@ -890,21 +867,20 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
-- | Parse @\<verbatim>@ tag.
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
-verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
+verbatimTag = return . B.text
+ <$ openTag "verbatim"
+ <*> manyTill anyChar (closeTag "verbatim")
-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" [])
- res <- manyTill inline (void $ htmlTag (~== TagClose "class"))
- let classes = maybe [] words $ lookup "name" attrs
+ classes <- maybe [] words . lookup "name" <$> openTag "class"
+ res <- manyTill inline $ closeTag "class"
return $ B.spanWith ("", classes, []) <$> mconcat res
-- | Parse "~~" as nonbreaking space.
nbsp :: PandocMonad m => MuseParser m (F Inlines)
-nbsp = try $ do
- string "~~"
- return $ return $ B.str "\160"
+nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
@@ -914,26 +890,27 @@ code = try $ do
guard $ not $ null contents
guard $ head contents `notElem` " \t\n"
guard $ last contents `notElem` " \t\n"
- notFollowedBy $ satisfy isLetter
+ notFollowedBy $ satisfy isAlphaNum
return $ return $ B.code contents
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
-codeTag = return . uncurry B.codeWith <$> htmlElement "code"
+codeTag = fmap pure $ B.codeWith
+ <$> (htmlAttrToPandoc <$> openTag "code")
+ <*> manyTill anyChar (closeTag "code")
-- | Parse @\<math>@ tag.
-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
-mathTag = return . B.math . snd <$> htmlElement "math"
+mathTag = return . B.math
+ <$ openTag "math"
+ <*> manyTill anyChar (closeTag "math")
-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
-inlineLiteralTag =
- (return . rawInline) <$> htmlElement "literal"
- where
- -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
- rawInline (attrs, content) = B.rawInline (format attrs) content
+inlineLiteralTag = try $ fmap pure $ B.rawInline
+ <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ <*> manyTill anyChar (closeTag "literal")
str :: PandocMonad m => MuseParser m (F Inlines)
str = return . B.str <$> many1 alphaNum <* updateLastStrPos
@@ -942,29 +919,58 @@ symbol :: PandocMonad m => MuseParser m (F Inlines)
symbol = return . B.str <$> count 1 nonspaceChar
-- | Parse a link or image.
-link :: PandocMonad m => MuseParser m (F Inlines)
-link = try $ do
- st <- getState
- guard $ not $ museInLink st
- setState $ st{ museInLink = True }
- (url, content) <- linkText
- updateState (\state -> state { museInLink = False })
- return $ case stripPrefix "URL:" url of
- Nothing -> if isImageUrl url
- then B.image url "" <$> fromMaybe (return mempty) content
- else B.link url "" <$> fromMaybe (return $ B.str url) content
- Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content
- where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
- imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
- isImageUrl = (`elem` imageExtensions) . takeExtension
+linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
+linkOrImage = try $ do
+ inLink <- asks museInLink
+ guard $ not inLink
+ local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link)
linkContent :: PandocMonad m => MuseParser m (F Inlines)
-linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]")
+linkContent = trimInlinesF . mconcat
+ <$ char '['
+ <*> manyTill inline (char ']')
+
+-- | Parse a link starting with @URL:@
+explicitLink :: PandocMonad m => MuseParser m (F Inlines)
+explicitLink = try $ do
+ string "[[URL:"
+ url <- manyTill anyChar $ char ']'
+ content <- option (pure $ B.str url) linkContent
+ char ']'
+ return $ B.link url "" <$> content
-linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines))
-linkText = do
+image :: PandocMonad m => MuseParser m (F Inlines)
+image = try $ do
+ string "[["
+ (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+ content <- option mempty linkContent
+ char ']'
+ let widthAttr = case align of
+ Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
+ _ -> maybeToList (("width",) . (++ "%") <$> width)
+ let alignClass = case align of
+ Just 'r' -> ["align-right"]
+ Just 'l' -> ["align-left"]
+ Just 'f' -> []
+ _ -> []
+ return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+ where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ imageExtension = choice (try . string <$> imageExtensions)
+ imageExtensionAndOptions = do
+ ext <- imageExtension
+ (width, align) <- option (Nothing, Nothing) imageAttrs
+ return (ext, width, align)
+ imageAttrs = (,)
+ <$ many1 spaceChar
+ <*> optionMaybe (many1 digit)
+ <* many spaceChar
+ <*> optionMaybe (oneOf "rlf")
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
string "[["
url <- manyTill anyChar $ char ']'
content <- optionMaybe linkContent
char ']'
- return (url, content)
+ return $ B.link url "" <$> fromMaybe (return $ B.str url) content
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
index d3db3a9e2..9e8221248 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -100,7 +100,7 @@ liftA fun a = a >>^ fun
-- | Duplicate a value to subsequently feed it into different arrows.
-- Can almost always be replaced with '(&&&)', 'keepingTheValue',
-- or even '(|||)'.
--- Aequivalent to
+-- Equivalent to
-- > returnA &&& returnA
duplicate :: (Arrow a) => a b (b,b)
duplicate = arr $ join (,)
@@ -114,7 +114,7 @@ infixr 2 >>%
-- | Duplicate a value and apply an arrow to the second instance.
--- Aequivalent to
+-- Equivalent to
-- > \a -> duplicate >>> second a
-- or
-- > \a -> returnA &&& a
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 6d96897aa..e76bbf5cf 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -50,7 +50,7 @@ class (Eq nsID, Ord nsID) => NameSpaceID nsID where
getNamespaceID :: NameSpaceIRI
-> NameSpaceIRIs nsID
-> Maybe (NameSpaceIRIs nsID, nsID)
- -- | Given a namespace id, lookup its IRI. May be overriden for performance.
+ -- | Given a namespace id, lookup its IRI. May be overridden for performance.
getIRI :: nsID
-> NameSpaceIRIs nsID
-> Maybe NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 616d9290b..45c6cd58c 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -61,7 +61,7 @@ import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
--- | Aequivalent to
+-- | Equivalent to
-- > foldr (.) id
-- where '(.)' are 'id' are the ones from "Control.Category"
-- and 'foldr' is the one from "Data.Foldable".
@@ -72,7 +72,7 @@ import Data.Maybe
composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
composition = F.foldr (<<<) Cat.id
--- | Aequivalent to
+-- | Equivalent to
-- > foldr (flip (.)) id
-- where '(.)' are 'id' are the ones from "Control.Category"
-- and 'foldr' is the one from "Data.Foldable".
@@ -133,9 +133,7 @@ class Lookupable a where
-- can be used directly in almost any case.
readLookupables :: (Lookupable a) => String -> [(a,String)]
readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- let result = lookup word lookupTable,
- isJust result,
- let Just a = result
+ a <- maybeToList (lookup word lookupTable)
]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 81392e16b..2327ea908 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -261,7 +261,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
-- The resulting converter even behaves like an identity converter on the
-- value level.
--
--- Aequivalent to
+-- Equivalent to
--
-- > \v x a -> convertingExtraState v (returnV x >>> a)
--
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index e0444559b..6a1682829 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -138,7 +138,7 @@ fontPitchReader = executeIn NsOffice "font-face-decls" (
lookupDefaultingAttr NsStyle "font-pitch"
))
>>?^ ( M.fromList . foldl accumLegalPitches [] )
- )
+ ) `ifFailedDo` (returnV (Right M.empty))
where accumLegalPitches ls (Nothing,_) = ls
accumLegalPitches ls (Just n,p) = (n,p):ls
@@ -342,7 +342,7 @@ instance Read XslUnit where
readsPrec _ _ = []
-- | Rough conversion of measures into millimetres.
--- Pixels and em's are actually implementation dependant/relative measures,
+-- Pixels and em's are actually implementation dependent/relative measures,
-- so I could not really easily calculate anything exact here even if I wanted.
-- But I do not care about exactness right now, as I only use measures
-- to determine if a paragraph is "indented" or not.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 888cd9307..1c52c3477 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -40,7 +40,7 @@ import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
- originalLang, translateLang)
+ originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
@@ -314,9 +314,6 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsCode :: [(String, String)] -> Bool
- exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
-
exportsResults :: [(String, String)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
@@ -743,7 +740,7 @@ latexEnd envName = try $
--
--- Footnote defintions
+-- Footnote definitions
--
noteBlock :: PandocMonad m => OrgParser m (F Blocks)
noteBlock = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c9465581a..7d55892fe 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,8 +16,7 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2018 Albert Krewinkel
@@ -45,7 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import qualified Data.Map as Map
+import qualified Data.Set as Set
import qualified Text.Pandoc.Builder as B
--
@@ -60,7 +58,7 @@ documentTree :: PandocMonad m
documentTree blocks inline = do
initialBlocks <- blocks
headlines <- sequence <$> manyTill (headline blocks inline 1) eof
- title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState
+ title <- fmap docTitle . orgStateMeta <$> getState
return $ do
headlines' <- headlines
initialBlocks' <- initialBlocks
@@ -70,19 +68,11 @@ documentTree blocks inline = do
, headlineTodoMarker = Nothing
, headlineText = B.fromList title'
, headlineTags = mempty
+ , headlinePlanning = emptyPlanning
, headlineProperties = mempty
, headlineContents = initialBlocks'
, headlineChildren = headlines'
}
- where
- getTitle :: Map.Map String MetaValue -> [Inline]
- getTitle metamap =
- case Map.lookup "title" metamap of
- Just (MetaInlines inlns) -> inlns
- _ -> []
-
-newtype Tag = Tag { fromTag :: String }
- deriving (Show, Eq)
-- | Create a tag containing the given string.
toTag :: String -> Tag
@@ -117,6 +107,7 @@ data Headline = Headline
, headlineTodoMarker :: Maybe TodoMarker
, headlineText :: Inlines
, headlineTags :: [Tag]
+ , headlinePlanning :: PlanningInfo -- ^ subtree planning information
, headlineProperties :: Properties
, headlineContents :: Blocks
, headlineChildren :: [Headline]
@@ -136,6 +127,7 @@ headline blocks inline lvl = try $ do
title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
tags <- option [] headerTags
newline
+ planning <- option emptyPlanning planningInfo
properties <- option mempty propertiesDrawer
contents <- blocks
children <- many (headline blocks inline (level + 1))
@@ -148,6 +140,7 @@ headline blocks inline lvl = try $ do
, headlineTodoMarker = todoKw
, headlineText = title'
, headlineTags = tags
+ , headlinePlanning = planning
, headlineProperties = properties
, headlineContents = contents'
, headlineChildren = children'
@@ -158,22 +151,27 @@ headline blocks inline lvl = try $ do
headerTags :: Monad m => OrgParser m [Tag]
headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ let tag = orgTagWord <* char ':'
in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-headlineToBlocks hdln@Headline {..} = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+headlineToBlocks hdln = do
+ maxLevel <- getExportSetting exportHeadlineLevels
+ let tags = headlineTags hdln
+ let text = headlineText hdln
+ let level = headlineLevel hdln
+ shouldNotExport <- hasDoNotExportTag tags
case () of
- _ | any isNoExportTag headlineTags -> return mempty
- _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
- _ | isCommentTitle headlineText -> return mempty
- _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
- _ | otherwise -> headlineToHeaderWithContents hdln
+ _ | shouldNotExport -> return mempty
+ _ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln
+ _ | isCommentTitle text -> return mempty
+ _ | maxLevel <= level -> headlineToHeaderWithList hdln
+ _ | otherwise -> headlineToHeaderWithContents hdln
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
+hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool
+hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState
+ where containsExcludedTag s = any (`Set.member` s) tags
isArchiveTag :: Tag -> Bool
isArchiveTag = (== toTag "ARCHIVE")
@@ -182,8 +180,9 @@ isArchiveTag = (== toTag "ARCHIVE")
-- FIXME: This accesses builder internals not intended for use in situations
-- like these. Replace once keyword parsing is supported.
isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _ = False
+isCommentTitle inlns = case B.toList inlns of
+ (Str "COMMENT":_) -> True
+ _ -> False
archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
archivedHeadlineToBlocks hdln = do
@@ -194,17 +193,23 @@ archivedHeadlineToBlocks hdln = do
ArchivedTreesHeadlineOnly -> headlineToHeader hdln
headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithList hdln@Headline {..} = do
+headlineToHeaderWithList hdln = do
maxHeadlineLevels <- getExportSetting exportHeadlineLevels
header <- headlineToHeader hdln
- listElements <- mapM headlineToBlocks headlineChildren
+ listElements <- mapM headlineToBlocks (headlineChildren hdln)
+ planningBlock <- planningToBlock (headlinePlanning hdln)
let listBlock = if null listElements
then mempty
else B.orderedList listElements
- let headerText = if maxHeadlineLevels == headlineLevel
+ let headerText = if maxHeadlineLevels == headlineLevel hdln
then header
else flattenHeader header
- return $ headerText <> headlineContents <> listBlock
+ return . mconcat $
+ [ headerText
+ , headlineContents hdln
+ , planningBlock
+ , listBlock
+ ]
where
flattenHeader :: Blocks -> Blocks
flattenHeader blks =
@@ -213,27 +218,28 @@ headlineToHeaderWithList hdln@Headline {..} = do
_ -> mempty
headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithContents hdln@Headline {..} = do
+headlineToHeaderWithContents hdln = do
header <- headlineToHeader hdln
- childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren
- return $ header <> headlineContents <> childrenBlocks
+ planningBlock <- planningToBlock (headlinePlanning hdln)
+ childrenBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren hdln)
+ return $ header <> planningBlock <> headlineContents hdln <> childrenBlocks
headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeader Headline {..} = do
+headlineToHeader hdln = do
exportTodoKeyword <- getExportSetting exportWithTodoKeywords
exportTags <- getExportSetting exportWithTags
let todoText = if exportTodoKeyword
- then case headlineTodoMarker of
+ then case headlineTodoMarker hdln of
Just kw -> todoKeywordToInlines kw <> B.space
Nothing -> mempty
else mempty
- let text = todoText <> headlineText <>
+ let text = todoText <> headlineText hdln <>
if exportTags
- then tagsToInlines headlineTags
+ then tagsToInlines (headlineTags hdln)
else mempty
- let propAttr = propertiesToAttr headlineProperties
- attr <- registerHeader propAttr headlineText
- return $ B.headerWith attr headlineLevel text
+ let propAttr = propertiesToAttr (headlineProperties hdln)
+ attr <- registerHeader propAttr (headlineText hdln)
+ return $ B.headerWith attr (headlineLevel hdln) text
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
@@ -277,9 +283,60 @@ tagsToInlines tags =
tagSpan :: Tag -> Inlines -> Inlines
tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)])
+-- | Render planning info as a block iff the respective export setting is
+-- enabled.
+planningToBlock :: Monad m => PlanningInfo -> OrgParser m Blocks
+planningToBlock planning = do
+ includePlanning <- getExportSetting exportWithPlanning
+ return $
+ if includePlanning
+ then B.plain . mconcat . intersperse B.space . filter (/= mempty) $
+ [ datumInlines planningClosed "CLOSED"
+ , datumInlines planningDeadline "DEADLINE"
+ , datumInlines planningScheduled "SCHEDULED"
+ ]
+ else mempty
+ where
+ datumInlines field name =
+ case field planning of
+ Nothing -> mempty
+ Just time -> B.strong (B.str name <> B.str ":")
+ <> B.space
+ <> B.emph (B.str time)
+
+-- | An Org timestamp, including repetition marks. TODO: improve
+type Timestamp = String
+
+timestamp :: Monad m => OrgParser m Timestamp
+timestamp = try $ do
+ openChar <- oneOf "<["
+ let isActive = openChar == '<'
+ let closeChar = if isActive then '>' else ']'
+ content <- many1Till anyChar (char closeChar)
+ return (openChar : content ++ [closeChar])
+
+-- | Planning information for a subtree/headline.
+data PlanningInfo = PlanningInfo
+ { planningClosed :: Maybe Timestamp
+ , planningDeadline :: Maybe Timestamp
+ , planningScheduled :: Maybe Timestamp
+ }
+emptyPlanning :: PlanningInfo
+emptyPlanning = PlanningInfo Nothing Nothing Nothing
-
+-- | Read a single planning-related and timestamped line.
+planningInfo :: Monad m => OrgParser m PlanningInfo
+planningInfo = try $ do
+ updaters <- many1 planningDatum <* skipSpaces <* newline
+ return $ foldr ($) emptyPlanning updaters
+ where
+ planningDatum = skipSpaces *> choice
+ [ updateWith (\s p -> p { planningScheduled = Just s}) "SCHEDULED"
+ , updateWith (\d p -> p { planningDeadline = Just d}) "DEADLINE"
+ , updateWith (\c p -> p { planningClosed = Just c}) "CLOSED"
+ ]
+ updateWith fn cs = fn <$> (string cs *> char ':' *> skipSpaces *> timestamp)
-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
-- within.
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index d02eb37c5..f79ee0d64 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -69,7 +69,7 @@ exportSetting = choice
, integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
, ignoredSetting "inline"
, ignoredSetting "num"
- , ignoredSetting "p"
+ , booleanSetting "p" (\val es -> es { exportWithPlanning = val })
, ignoredSetting "pri"
, ignoredSetting "prop"
, ignoredSetting "stat"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 7d1568b80..a5335ca57 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
- originalLang, translateLang)
+ originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
import qualified Text.Pandoc.Builder as B
@@ -510,7 +510,7 @@ anchor = try $ do
<* string ">>"
<* skipSpaces
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
solidify :: String -> String
@@ -525,11 +525,13 @@ inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
- opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+ opts <- option [] $ try (enclosedByPair '[' ']' inlineBlockOption)
+ <|> (mempty <$ string "[]")
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ returnF $ (if exportsCode opts then codeInlineBlck else mempty)
where
inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
inlineBlockOption = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 965e33d94..cad1d7123 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -52,6 +52,7 @@ import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -158,6 +159,7 @@ optionLine = try $ do
"seq_todo" -> todoSequence >>= updateState . registerTodoSequence
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence
"macro" -> macroDefinition >>= updateState . registerMacro
+ "exclude_tags" -> excludedTagList >>= updateState . setExcludedTags
"pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
@@ -190,6 +192,18 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+excludedTagList :: Monad m => OrgParser m [Tag]
+excludedTagList = do
+ skipSpaces
+ map Tag <$> many (orgTagWord <* skipSpaces) <* newline
+
+setExcludedTags :: [Tag] -> OrgParserState -> OrgParserState
+setExcludedTags tagList st =
+ let finalSet = if orgStateExcludedTagsChanged st
+ then foldr Set.insert (orgStateExcludedTags st) tagList
+ else Set.fromList tagList
+ in st { orgStateExcludedTags = finalSet, orgStateExcludedTagsChanged = True}
+
setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
setEmphasisPreChar csMb st =
let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 4cb5bb626..59478256f 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, defaultOrgParserState
, OrgParserLocal (..)
, OrgNoteRecord
+ , Tag(..)
, HasReaderOptions (..)
, HasQuoteContext (..)
, HasMacros (..)
@@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord]
type OrgLinkFormatters = M.Map String (String -> String)
-- | Macro expander function
type MacroExpander = [String] -> String
+-- | Tag
+newtype Tag = Tag { fromTag :: String }
+ deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
data TodoState = Todo | Done
@@ -113,6 +117,8 @@ data OrgParserState = OrgParserState
-- specified here.
, orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis
, orgStateEmphasisNewlines :: Maybe Int
+ , orgStateExcludedTags :: Set.Set Tag
+ , orgStateExcludedTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
, orgStateHeaderMap :: M.Map Inlines String
, orgStateIdentifiers :: Set.Set String
@@ -183,6 +189,8 @@ defaultOrgParserState = OrgParserState
, orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing
, orgStateExportSettings = def
+ , orgStateExcludedTags = Set.singleton $ Tag "noexport"
+ , orgStateExcludedTagsChanged = False
, orgStateHeaderMap = M.empty
, orgStateIdentifiers = Set.empty
, orgStateIncludeFiles = []
@@ -260,6 +268,7 @@ data ExportSettings = ExportSettings
, exportWithAuthor :: Bool -- ^ Include author in final meta-data
, exportWithCreator :: Bool -- ^ Include creator in final meta-data
, exportWithEmail :: Bool -- ^ Include email in final meta-data
+ , exportWithPlanning :: Bool -- ^ Keep planning info after headlines
, exportWithTags :: Bool -- ^ Keep tags as part of headlines
, exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
}
@@ -280,6 +289,7 @@ defaultExportSettings = ExportSettings
, exportWithAuthor = True
, exportWithCreator = True
, exportWithEmail = True
+ , exportWithPlanning = False
, exportWithTags = True
, exportWithTodoKeywords = True
}
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index e014de65e..52a346e36 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing
, orgArgKey
, orgArgWord
, orgArgWordChar
+ , orgTagWord
+ , orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
, many1Till
@@ -137,14 +139,13 @@ anyLine =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
+-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
+-- allowed before emphasised text.
parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ updateState $ \s -> s { orgStateLastPreCharPos = Nothing }
return result
-- | Skip one or more tab or space characters.
@@ -221,3 +222,9 @@ orgArgWord = many1 orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
+
+orgTagWord :: Monad m => OrgParser m String
+orgTagWord = many1 orgTagWordChar
+
+orgTagWordChar :: Monad m => OrgParser m Char
+orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 17fe34738..71d1dd517 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Org.Shared
, isImageFilename
, originalLang
, translateLang
+ , exportsCode
) where
import Prelude
@@ -96,3 +97,6 @@ translateLang cs =
"sh" -> "bash"
"sqlite" -> "sql"
_ -> cs
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 71a38cf82..28fa7b83e 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,7 +37,7 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
nub, sort, transpose, union)
import qualified Data.Map as M
@@ -172,6 +172,7 @@ parseRST = do
docMinusKeys <- concat <$>
manyTill (referenceKey <|> anchorDef <|>
noteBlock <|> citationBlock <|>
+ (snd <$> withRaw comment) <|>
headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
@@ -1089,7 +1090,7 @@ referenceKey = do
targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
- optional newline
+ optional $ try $ newline >> notFollowedBy blankline
contents <- trim <$>
many1 (satisfy (/='\n')
<|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
@@ -1313,19 +1314,24 @@ table = gridTable False <|> simpleTable False <|>
inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws
- , whitespace
, link
- , str
- , endline
, strong
, emph
, code
, subst
, interpretedRole
- , smart
- , hyphens
- , escapedChar
- , symbol ] <?> "inline"
+ , inlineContent ] <?> "inline"
+
+-- strings, spaces and other characters that can appear either by
+-- themselves or within inline markup
+inlineContent :: PandocMonad m => RSTParser m Inlines
+inlineContent = choice [ whitespace
+ , str
+ , endline
+ , smart
+ , hyphens
+ , escapedChar
+ , symbol ] <?> "inline content"
parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
@@ -1368,11 +1374,11 @@ atStart p = do
emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$>
- enclosed (atStart $ char '*') (char '*') inline
+ enclosed (atStart $ char '*') (char '*') inlineContent
strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$>
- enclosed (atStart $ string "**") (try $ string "**") inline
+ enclosed (atStart $ string "**") (try $ string "**") inlineContent
-- Note, this doesn't precisely implement the complex rule in
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
@@ -1380,7 +1386,6 @@ strong = B.strong . trimInlines . mconcat <$>
--
-- TODO:
-- - Classes are silently discarded in addNewRole
--- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
@@ -1390,12 +1395,12 @@ interpretedRole = try $ do
renderRole :: PandocMonad m
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
- "sup" -> return $ B.superscript $ B.str contents
- "superscript" -> return $ B.superscript $ B.str contents
- "sub" -> return $ B.subscript $ B.str contents
- "subscript" -> return $ B.subscript $ B.str contents
- "emphasis" -> return $ B.emph $ B.str contents
- "strong" -> return $ B.strong $ B.str contents
+ "sup" -> return $ B.superscript $ treatAsText contents
+ "superscript" -> return $ B.superscript $ treatAsText contents
+ "sub" -> return $ B.subscript $ treatAsText contents
+ "subscript" -> return $ B.subscript $ treatAsText contents
+ "emphasis" -> return $ B.emph $ treatAsText contents
+ "strong" -> return $ B.strong $ treatAsText contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
@@ -1406,7 +1411,7 @@ renderRole contents fmt role attr = case role of
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
- "span" -> return $ B.spanWith attr $ B.str contents
+ "span" -> return $ B.spanWith attr $ treatAsText contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRoles <- stateRstCustomRoles <$> getState
@@ -1414,14 +1419,20 @@ renderRole contents fmt role attr = case role of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> -- undefined role
- return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
+ return $ B.codeWith ("",["interpreted-text"],[("role",role)])
+ contents
where
- titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+ titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ treatAsText = B.text . handleEscapes
+ handleEscapes [] = []
+ handleEscapes ('\\':' ':cs) = handleEscapes cs
+ handleEscapes ('\\':c:cs) = c : handleEscapes cs
+ handleEscapes (c:cs) = c : handleEscapes cs
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@@ -1445,7 +1456,18 @@ roleAfter = try $ do
return (role,contents)
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
-unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
+unmarkedInterpretedText = try $ do
+ atStart (char '`')
+ contents <- mconcat <$> (many1
+ ( many1 (noneOf "`\\\n")
+ <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
+ <|> (string "\n" <* notFollowedBy blankline)
+ <|> try (string "`" <*
+ notFollowedBy (() <$ roleMarker) <*
+ lookAhead (satisfy isAlphaNum))
+ ))
+ char '`'
+ return contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
@@ -1480,7 +1502,7 @@ explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
label' <- trimInlines . mconcat <$>
- manyTill (notFollowedBy (char '`') >> inline) (char '<')
+ manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
src <- trim <$> manyTill (noneOf ">\n") (char '>')
skipSpaces
string "`_"
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 1f230ae7e..c3cfedcfb 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -74,9 +74,6 @@ type TWParser = ParserT [Char] ParserState
tryMsg :: String -> TWParser m a -> TWParser m a
tryMsg msg p = try p <?> msg
-skip :: TWParser m a -> TWParser m ()
-skip parser = parser >> return ()
-
nested :: PandocMonad m => TWParser m a -> TWParser m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
@@ -92,7 +89,7 @@ htmlElement tag = tryMsg tag $ do
content <- manyTill anyChar (endtag <|> endofinput)
return (htmlAttrToPandoc attr, trim content)
where
- endtag = skip $ htmlTag (~== TagClose tag)
+ endtag = void $ htmlTag (~== TagClose tag)
endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
@@ -114,18 +111,15 @@ parseHtmlContentWithAttrs tag parser = do
endOfContent = try $ skipMany blankline >> skipSpaces >> eof
parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
-parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p
--
-- main parser
--
parseTWiki :: PandocMonad m => TWParser m Pandoc
-parseTWiki = do
- bs <- mconcat <$> many block
- spaces
- eof
- return $ B.doc bs
+parseTWiki =
+ B.doc . mconcat <$> many block <* spaces <* eof
--
@@ -158,7 +152,7 @@ separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalR
header :: PandocMonad m => TWParser m B.Blocks
header = tryMsg "header" $ do
string "---"
- level <- many1 (char '+') >>= return . length
+ level <- length <$> many1 (char '+')
guard $ level <= 6
classes <- option [] $ string "!!" >> return ["unnumbered"]
skipSpaces
@@ -167,11 +161,10 @@ header = tryMsg "header" $ do
return $ B.headerWith attr level content
verbatim :: PandocMonad m => TWParser m B.Blocks
-verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
- >>= return . (uncurry B.codeBlockWith)
+verbatim = uncurry B.codeBlockWith <$> (htmlElement "verbatim" <|> htmlElement "pre")
literal :: PandocMonad m => TWParser m B.Blocks
-literal = htmlElement "literal" >>= return . rawBlock
+literal = rawBlock <$> htmlElement "literal"
where
format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) content
@@ -183,7 +176,7 @@ list prefix = choice [ bulletList prefix
definitionList :: PandocMonad m => String -> TWParser m B.Blocks
definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ "
elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
return $ B.definitionList elements
where
@@ -193,7 +186,7 @@ definitionList prefix = tryMsg "definitionList" $ do
string (indent ++ "$ ") >> skipSpaces
term <- many1Till inline $ string ": "
line <- listItemLine indent $ string "$ "
- return $ (mconcat term, [line])
+ return (mconcat term, [line])
bulletList :: PandocMonad m => String -> TWParser m B.Blocks
bulletList prefix = tryMsg "bulletList" $
@@ -227,25 +220,24 @@ parseListItem prefix marker = string prefix >> marker >> listItemLine prefix mar
listItemLine :: (PandocMonad m, Show a)
=> String -> TWParser m a -> TWParser m B.Blocks
-listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
where
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation
filterSpaces = reverse . dropWhile (== ' ') . reverse
listContinuation = notFollowedBy (string prefix >> marker) >>
string " " >> lineContent
parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
- parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
- return . B.plain . mconcat
+ parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList)
nestedList = list prefix
lastNewline = try $ char '\n' <* eof
newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
table :: PandocMonad m => TWParser m B.Blocks
table = try $ do
- tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
rows <- many1 tableParseRow
return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
where
@@ -258,11 +250,11 @@ table = try $ do
tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
tableParseHeader = try $ do
char '|'
- leftSpaces <- many spaceChar >>= return . length
+ leftSpaces <- length <$> many spaceChar
char '*'
content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
char '*'
- rightSpaces <- many spaceChar >>= return . length
+ rightSpaces <- length <$> many spaceChar
optional tableEndOfRow
return (tableAlign leftSpaces rightSpaces, content)
where
@@ -283,13 +275,13 @@ tableEndOfRow :: PandocMonad m => TWParser m Char
tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
-tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+tableColumnContent end = (B.plain . mconcat) <$> manyTill content (lookAhead $ try end)
where
content = continuation <|> inline
continuation = try $ char '\\' >> newline >> return mempty
blockQuote :: PandocMonad m => TWParser m B.Blocks
-blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+blockQuote = (B.blockQuote . mconcat) <$> parseHtmlContent "blockquote" block
noautolink :: PandocMonad m => TWParser m B.Blocks
noautolink = do
@@ -300,15 +292,15 @@ noautolink = do
setState $ st{ stateAllowLinks = True }
return $ mconcat blocks
where
- parseContent = parseFromString' $ many $ block
+ parseContent = parseFromString' $ many block
para :: PandocMonad m => TWParser m B.Blocks
-para = many1Till inline endOfParaElement >>= return . result . mconcat
+para = (result . mconcat) <$> many1Till inline endOfParaElement
where
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
endOfInput = try $ skipMany blankline >> skipSpaces >> eof
endOfPara = try $ blankline >> skipMany1 blankline
- newBlockElement = try $ blankline >> skip blockElements
+ newBlockElement = try $ blankline >> void blockElements
result content = if F.all (==Space) content
then mempty
else B.para $ B.trimInlines content
@@ -340,7 +332,7 @@ inline = choice [ whitespace
] <?> "inline"
whitespace :: PandocMonad m => TWParser m B.Inlines
-whitespace = (lb <|> regsp) >>= return
+whitespace = lb <|> regsp
where lb = try $ skipMany spaceChar >> linebreak >> return B.space
regsp = try $ skipMany1 spaceChar >> return B.space
@@ -362,13 +354,13 @@ enclosed :: (Monoid b, PandocMonad m, Show a)
=> TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
enclosed sep p = between sep (try $ sep <* endMarker) p
where
- endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endMarker = lookAhead $ void endSpace <|> void (oneOf ".,!?:)|") <|> eof
endSpace = (spaceChar <|> newline) >> return B.space
macro :: PandocMonad m => TWParser m B.Inlines
macro = macroWithParameters <|> withoutParameters
where
- withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ withoutParameters = emptySpan <$> enclosed (char '%') (const macroName)
emptySpan name = buildSpan name [] mempty
macroWithParameters :: PandocMonad m => TWParser m B.Inlines
@@ -393,13 +385,13 @@ macroName = do
return (first:rest)
attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
- return . foldr (either mkContent mkKvs) ([], [])
+attributes = foldr (either mkContent mkKvs) ([], [])
+ <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}')
where
spnl = skipMany (spaceChar <|> newline)
mkContent c ([], kvs) = (c, kvs)
mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
- mkKvs kv (cont, rest) = (cont, (kv : rest))
+ mkKvs kv (cont, rest) = (cont, kv : rest)
attribute :: PandocMonad m => TWParser m (Either String (String, String))
attribute = withKey <|> withoutKey
@@ -407,52 +399,50 @@ attribute = withKey <|> withoutKey
withKey = try $ do
key <- macroName
char '='
- parseValue False >>= return . (curry Right key)
- withoutKey = try $ parseValue True >>= return . Left
- parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ curry Right key <$> parseValue False
+ withoutKey = try $ Left <$> parseValue True
+ parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces)
withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
withoutQuotes allowSpaces
- | allowSpaces == True = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
+ | allowSpaces = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
nestedInlines :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
where
- innerSpace = try $ whitespace <* (notFollowedBy end)
+ innerSpace = try $ whitespace <* notFollowedBy end
nestedInline = notFollowedBy whitespace >> nested inline
strong :: PandocMonad m => TWParser m B.Inlines
-strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+strong = try $ B.strong <$> enclosed (char '*') nestedInlines
strongHtml :: PandocMonad m => TWParser m B.Inlines
-strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
- >>= return . B.strong . mconcat
+strongHtml = B.strong . mconcat <$> (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
strongAndEmph :: PandocMonad m => TWParser m B.Inlines
-strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+strongAndEmph = try $ B.emph . B.strong <$> enclosed (string "__") nestedInlines
emph :: PandocMonad m => TWParser m B.Inlines
-emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+emph = try $ B.emph <$> enclosed (char '_') nestedInlines
emphHtml :: PandocMonad m => TWParser m B.Inlines
-emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
- >>= return . B.emph . mconcat
+emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
nestedString :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> (count 1 nonspaceChar)
+nestedString end = innerSpace <|> count 1 nonspaceChar
where
innerSpace = try $ many1 spaceChar <* notFollowedBy end
boldCode :: PandocMonad m => TWParser m B.Inlines
-boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString
htmlComment :: PandocMonad m => TWParser m B.Inlines
htmlComment = htmlTag isCommentTag >> return mempty
code :: PandocMonad m => TWParser m B.Inlines
-code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString
codeHtml :: PandocMonad m => TWParser m B.Inlines
codeHtml = do
@@ -464,7 +454,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- parseLink
- guard $ checkLink (head $ reverse url)
+ guard $ checkLink (last url)
return $ makeLink (text, url)
where
parseLink = notFollowedBy nop >> (uri <|> emailAddress)
@@ -474,17 +464,17 @@ autoLink = try $ do
| otherwise = isAlphaNum c
str :: PandocMonad m => TWParser m B.Inlines
-str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
nop :: PandocMonad m => TWParser m B.Inlines
-nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+nop = try $ (void exclamation <|> void nopTag) >> followContent
where
exclamation = char '!'
nopTag = stringAnyCase "<nop>"
- followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+ followContent = B.str . fromEntities <$> many1 nonspaceChar
symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = count 1 nonspaceChar >>= return . B.str
+symbol = B.str <$> count 1 nonspaceChar
smart :: PandocMonad m => TWParser m B.Inlines
smart = do
@@ -498,17 +488,16 @@ smart = do
singleQuoted :: PandocMonad m => TWParser m B.Inlines
singleQuoted = try $ do
singleQuoteStart
- withQuoteContext InSingleQuote $
- many1Till inline singleQuoteEnd >>=
- (return . B.singleQuoted . B.trimInlines . mconcat)
+ withQuoteContext InSingleQuote
+ (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
doubleQuoted :: PandocMonad m => TWParser m B.Inlines
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ withQuoteContext InDoubleQuote (doubleQuoteEnd >>
return (B.doubleQuoted $ B.trimInlines contents))
- <|> (return $ (B.str "\8220") B.<> contents)
+ <|> return (B.str "\8220" B.<> contents)
link :: PandocMonad m => TWParser m B.Inlines
link = try $ do
@@ -527,5 +516,5 @@ linkText = do
char ']'
return (url, "", content)
where
- linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent
parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index bc3bcaf26..4b65be347 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -540,7 +540,7 @@ wordChunk = try $ do
str :: PandocMonad m => ParserT [Char] ParserState m Inlines
str = do
baseStr <- hyphenedWords
- -- RedCloth compliance : if parsed word is uppercase and immediatly
+ -- RedCloth compliance : if parsed word is uppercase and immediately
-- followed by parens, parens content is unconditionally word acronym
fullStr <- option baseStr $ try $ do
guard $ all isUpper baseStr
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index 5c7507248..8458b05e5 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -22,6 +22,7 @@ import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.Foldable as F
+import Data.List (dropWhileEnd)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
@@ -166,7 +167,7 @@ table = try $ do
-- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows
return $B.simpleTable (headers rows) rows
where
- -- The headers are as many empty srings as the number of columns
+ -- The headers are as many empty strings as the number of columns
-- in the first row
headers rows = map (B.plain . B.str) $replicate (length $ head rows) ""
@@ -319,7 +320,7 @@ listItem = choice [
bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
bulletItem = try $ do
prefix <- many1 $ char '*'
- many1 $ char ' '
+ many $ char ' '
content <- listItemLine (length prefix)
return (LN Bullet (length prefix), B.plain content)
@@ -331,7 +332,7 @@ bulletItem = try $ do
numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
numberedItem = try $ do
prefix <- many1 $ char '#'
- many1 $ char ' '
+ many $ char ' '
content <- listItemLine (length prefix)
return (LN Numbered (length prefix), B.plain content)
@@ -346,7 +347,7 @@ listItemLine nest = lineContent >>= parseContent
listContinuation = string (replicate nest '+') >> lineContent
parseContent x = do
parsed <- parseFromString (many1 inline) x
- return $ mconcat parsed
+ return $ mconcat $ dropWhileEnd (== B.space) parsed
-- Turn the CODE macro attributes into Pandoc code block attributes.
mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index bed49fd46..26dc934a9 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -198,7 +198,7 @@ para = try $ do
commentBlock :: T2T Blocks
commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment
--- Seperator and Strong line treated the same
+-- Separator and Strong line treated the same
hrule :: T2T Blocks
hrule = try $ do
spaces
@@ -575,8 +575,10 @@ symbol = B.str . (:[]) <$> oneOf specialChars
getTarget :: T2T String
getTarget = do
mv <- lookupMeta "target" . stateMeta <$> getState
- let MetaString target = fromMaybe (MetaString "html") mv
- return target
+ return $ case mv of
+ Just (MetaString target) -> target
+ Just (MetaInlines [Str target]) -> target
+ _ -> "html"
atStart :: T2T ()
atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 824a912c3..15f0d991f 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -429,9 +429,7 @@ tableRow = try $ do
s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
>> newline))
guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
- tr <- many tableCell
- many spaceChar >> char '\n'
- return tr
+ many tableCell <* many spaceChar <* char '\n'
tableCell :: PandocMonad m => VwParser m Blocks
tableCell = try $
@@ -451,13 +449,13 @@ ph s = try $ do
noHtmlPh :: PandocMonad m => VwParser m ()
noHtmlPh = try $
- () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
- >> lookAhead newline)
+ () <$ many spaceChar <* string "%nohtml" <* many spaceChar
+ <* lookAhead newline
templatePh :: PandocMonad m => VwParser m ()
templatePh = try $
- () <$ (many spaceChar >> string "%template" >>many (noneOf "\n")
- >> lookAhead newline)
+ () <$ many spaceChar <* string "%template" <* many (noneOf "\n")
+ <* lookAhead newline
-- inline parser
@@ -617,10 +615,8 @@ procImgurl :: String -> String
procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
inlineMath :: PandocMonad m => VwParser m Inlines
-inlineMath = try $ do
- char '$'
- contents <- many1Till (noneOf "\n") (char '$')
- return $ B.math contents
+inlineMath = try $
+ B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$')
tag :: PandocMonad m => VwParser m Inlines
tag = try $ do