aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs18
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs129
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs194
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs18
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs266
-rw-r--r--src/Text/Pandoc/Readers/RST.hs41
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs12
7 files changed, 337 insertions, 341 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b06e07a80..dab3d5db2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1069,11 +1069,11 @@ instance NamedTag (Tag String) where
getTagName _ = Nothing
isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t = isInlineTagName || isCommentTag t
- where isInlineTagName = case getTagName t of
- Just x -> x
- `Set.notMember` blockTags
- Nothing -> False
+isInlineTag t =
+ isCommentTag t || case getTagName t of
+ Nothing -> False
+ Just x -> x `Set.notMember` blockTags ||
+ T.take 1 x == "?" -- processing instr.
isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
isBlockTag t = isBlockTagName || isTagComment t
@@ -1208,8 +1208,10 @@ htmlTag f = try $ do
let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
let isName s = case s of
[] -> False
- ('?':_) -> True -- processing instruction
(c:cs) -> isLetter c && all isNameChar cs
+ let isPI s = case s of
+ ('?':_) -> True -- processing instruction
+ _ -> False
let endpos = if ln == 1
then setSourceColumn startpos
@@ -1225,7 +1227,7 @@ htmlTag f = try $ do
let handleTag tagname = do
-- basic sanity check, since the parser is very forgiving
-- and finds tags in stuff like x<y)
- guard $ isName tagname
+ guard $ isName tagname || isPI tagname
guard $ not $ null tagname
-- <https://example.org> should NOT be a tag either.
-- tagsoup will parse it as TagOpen "https:" [("example.org","")]
@@ -1245,7 +1247,7 @@ htmlTag f = try $ do
else return (next, "<!--" <> s <> "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
TagOpen tagname attr -> do
- guard $ all (isName . fst) attr
+ guard $ isPI tagname || all (isName . fst) attr
handleTag tagname
TagClose tagname ->
handleTag tagname
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7c5619165..26ac781db 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1308,13 +1308,7 @@ isBlockCommand s =
treatAsBlock :: Set.Set Text
treatAsBlock = Set.fromList
- [ "let", "def", "DeclareRobustCommand"
- , "newcommand", "renewcommand"
- , "newenvironment", "renewenvironment"
- , "providecommand", "provideenvironment"
- -- newcommand, etc. should be parsed by macroDef, but we need this
- -- here so these aren't parsed as inline commands to ignore
- , "special", "pdfannot", "pdfstringdef"
+ [ "special", "pdfannot", "pdfstringdef"
, "bibliographystyle"
, "maketitle", "makeindex", "makeglossary"
, "addcontentsline", "addtocontents", "addtocounter"
@@ -1375,6 +1369,7 @@ inline = (mempty <$ comment)
<|> (space <$ whitespace)
<|> (softbreak <$ endline)
<|> word
+ <|> macroDef
<|> inlineCommand'
<|> inlineEnvironment
<|> inlineGroup
@@ -1420,8 +1415,7 @@ end_ t = try (do
preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$ many preambleBlock
where preambleBlock = spaces1
- <|> void macroDef
- <|> void blockCommand
+ <|> void (macroDef <|> blockCommand)
<|> void braced
<|> (notFollowedBy (begin_ "document") >> void anyTok)
@@ -1484,9 +1478,9 @@ authors = try $ do
egroup
addMeta "author" (map trimInlines auths)
-macroDef :: PandocMonad m => LP m Blocks
+macroDef :: (Monoid a, PandocMonad m) => LP m a
macroDef =
- mempty <$ ((commandDef <|> environmentDef) <* doMacros 0)
+ mempty <$ (commandDef <|> environmentDef)
where commandDef = do
(name, macro') <- newcommand <|> letmacro <|> defmacro
guardDisabled Ext_latex_macros <|>
@@ -1506,21 +1500,28 @@ macroDef =
letmacro :: PandocMonad m => LP m (Text, Macro)
letmacro = do
controlSeq "let"
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- optional $ symbol '='
- spaces
- contents <- bracedOrToken
- return (name, Macro ExpandWhenDefined [] Nothing contents)
+ (name, contents) <- withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ optional $ symbol '='
+ spaces
+ -- we first parse in verbatim mode, and then expand macros,
+ -- because we don't want \let\foo\bar to turn into
+ -- \let\foo hello if we have previously \def\bar{hello}
+ contents <- bracedOrToken
+ return (name, contents)
+ contents' <- doMacros' 0 contents
+ return (name, Macro ExpandWhenDefined [] Nothing contents')
defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $ do
- controlSeq "def"
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- argspecs <- many (argspecArg <|> argspecPattern)
+defmacro = try $
-- we use withVerbatimMode, because macros are to be expanded
-- at point of use, not point of definition
- contents <- withVerbatimMode bracedOrToken
- return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+ withVerbatimMode $ do
+ controlSeq "def"
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ argspecs <- many (argspecArg <|> argspecPattern)
+ contents <- bracedOrToken
+ return (name, Macro ExpandWhenUsed argspecs Nothing contents)
argspecArg :: PandocMonad m => LP m ArgSpec
argspecArg = do
@@ -1530,8 +1531,8 @@ argspecArg = do
argspecPattern :: PandocMonad m => LP m ArgSpec
argspecPattern =
Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
- (toktype' == Symbol || toktype' == Word) &&
- (txt /= "{" && txt /= "\\" && txt /= "}")))
+ (toktype' == Symbol || toktype' == Word) &&
+ (txt /= "{" && txt /= "\\" && txt /= "}")))
newcommand :: PandocMonad m => LP m (Text, Macro)
newcommand = do
@@ -1540,22 +1541,24 @@ newcommand = do
controlSeq "renewcommand" <|>
controlSeq "providecommand" <|>
controlSeq "DeclareRobustCommand"
- optional $ symbol '*'
- Tok _ (CtrlSeq name) txt <- withVerbatimMode $ anyControlSeq <|>
- (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
- spaces
- numargs <- option 0 $ try bracketedNum
- let argspecs = map (\i -> ArgNum i) [1..numargs]
- spaces
- optarg <- option Nothing $ Just <$> try bracketedToks
- spaces
- contents <- withVerbatimMode bracedOrToken
- when (mtype == "newcommand") $ do
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
- Nothing -> return ()
- return (name, Macro ExpandWhenUsed argspecs optarg contents)
+ withVerbatimMode $ do
+ Tok _ (CtrlSeq name) txt <- do
+ optional (symbol '*')
+ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents <- bracedOrToken
+ when (mtype == "newcommand") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
+ Nothing -> return ()
+ return (name, Macro ExpandWhenUsed argspecs optarg contents)
newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
newenvironment = do
@@ -1563,24 +1566,23 @@ newenvironment = do
Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
controlSeq "renewenvironment" <|>
controlSeq "provideenvironment"
- optional $ symbol '*'
- spaces
- name <- untokenize <$> braced
- spaces
- numargs <- option 0 $ try bracketedNum
- let argspecs = map (\i -> ArgNum i) [1..numargs]
- spaces
- optarg <- option Nothing $ Just <$> try bracketedToks
- spaces
- startcontents <- withVerbatimMode bracedOrToken
- spaces
- endcontents <- withVerbatimMode bracedOrToken
- when (mtype == "newenvironment") $ do
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
- Nothing -> return ()
- return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
+ withVerbatimMode $ do
+ optional $ symbol '*'
+ spaces
+ name <- untokenize <$> braced
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
+ startcontents <- spaces >> bracedOrToken
+ endcontents <- spaces >> bracedOrToken
+ when (mtype == "newenvironment") $ do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
+ Nothing -> return ()
+ return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
Macro ExpandWhenUsed [] Nothing endcontents)
bracketedNum :: PandocMonad m => LP m Int
@@ -1644,7 +1646,9 @@ blockCommand = try $ do
let names = ordNub [name', name]
let rawDefiniteBlock = do
guard $ isBlockCommand name
- rawBlock "latex" <$> getRawCommand name (txt <> star)
+ rawcontents <- getRawCommand name (txt <> star)
+ (guardEnabled Ext_raw_tex >> return (rawBlock "latex" rawcontents))
+ <|> ignore rawcontents
-- heuristic: if it could be either block or inline, we
-- treat it if block if we have a sequence of block
-- commands followed by a newline. But we stop if we
@@ -1656,7 +1660,10 @@ blockCommand = try $ do
guard $ "start" `T.isPrefixOf` n
let rawMaybeBlock = try $ do
guard $ not $ isInlineCommand name
- curr <- rawBlock "latex" <$> getRawCommand name (txt <> star)
+ rawcontents <- getRawCommand name (txt <> star)
+ curr <- (guardEnabled Ext_raw_tex >>
+ return (rawBlock "latex" rawcontents))
+ <|> ignore rawcontents
rest <- many $ notFollowedBy startCommand *> blockCommand
lookAhead $ blankline <|> startCommand
return $ curr <> mconcat rest
@@ -1757,6 +1764,8 @@ blockCommands = M.fromList
, ("input", include "input")
, ("subfile", include "subfile")
, ("usepackage", include "usepackage")
+ -- preamble
+ , ("PackageError", mempty <$ (braced >> braced >> braced))
]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9256217fe..69bbf28d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -49,6 +49,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, toksToString
, satisfyTok
, doMacros
+ , doMacros'
, setpos
, anyControlSeq
, anySymbol
@@ -110,6 +111,8 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
import Text.Pandoc.Shared
import Text.Parsec.Pos
+-- import Debug.Trace (traceShowId)
+
newtype DottedNum = DottedNum [Int]
deriving (Show)
@@ -140,6 +143,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sLabels :: M.Map String [Inline]
, sHasChapters :: Bool
, sToggles :: M.Map String Bool
+ , sExpanded :: Bool
}
deriving Show
@@ -161,6 +165,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def
, sLabels = M.empty
, sHasChapters = False
, sToggles = M.empty
+ , sExpanded = False
}
instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -211,10 +216,14 @@ type LP m = ParserT [Tok] LaTeXState m
withVerbatimMode :: PandocMonad m => LP m a -> LP m a
withVerbatimMode parser = do
- updateState $ \st -> st{ sVerbatimMode = True }
- result <- parser
- updateState $ \st -> st{ sVerbatimMode = False }
- return result
+ alreadyVerbatimMode <- sVerbatimMode <$> getState
+ if alreadyVerbatimMode
+ then parser
+ else do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
@@ -231,7 +240,7 @@ rawLaTeXParser retokenize parser valParser = do
Right toks' -> do
res <- lift $ runParserT (do when retokenize $ do
-- retokenize, applying macros
- doMacros 0
+ doMacros
ts <- many (satisfyTok (const True))
setInput ts
rawparser)
@@ -246,8 +255,7 @@ rawLaTeXParser retokenize parser valParser = do
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> String -> ParserT String s m String
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = doMacros 0 *>
- (toksToString <$> many (satisfyTok (const True)))
+ do let retokenize = toksToString <$> many (satisfyTok (const True))
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
, sMacros = extractMacros pstate }
@@ -255,6 +263,7 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
case res of
Left e -> fail (show e)
Right s' -> return s'
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@@ -368,10 +377,10 @@ toksToString :: [Tok] -> String
toksToString = T.unpack . untokenize
satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
-satisfyTok f =
- try $ do
+satisfyTok f = do
+ doMacros -- apply macros on remaining input stream
res <- tokenPrim (T.unpack . untoken) updatePos matcher
- doMacros 0 -- apply macros on remaining input stream
+ updateState $ \st -> st{ sExpanded = False }
return res
where matcher t | f t = Just t
| otherwise = Nothing
@@ -379,82 +388,97 @@ satisfyTok f =
updatePos _spos _ (Tok pos _ _ : _) = pos
updatePos spos _ [] = incSourceColumn spos 1
-doMacros :: PandocMonad m => Int -> LP m ()
-doMacros n = do
+doMacros :: PandocMonad m => LP m ()
+doMacros = do
+ expanded <- sExpanded <$> getState
verbatimMode <- sVerbatimMode <$> getState
- unless verbatimMode $ do
- inp <- getInput
- case inp of
- Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos name ts
- Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos ("end" <> name) ts
- Tok _ (CtrlSeq "expandafter") _ : t : ts
- -> do setInput ts
- doMacros n
- getInput >>= setInput . combineTok t
- Tok spos (CtrlSeq name) _ : ts
- -> handleMacros spos name ts
- _ -> return ()
- where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
- | T.all isLetterOrAt w =
- Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
- where (x1, x2) = T.break isSpaceOrTab x
- combineTok t ts = t:ts
- handleMacros spos name ts = do
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Nothing -> return ()
- Just (Macro expansionPoint argspecs optarg newtoks) -> do
- setInput ts
- let matchTok (Tok _ toktype txt) =
- satisfyTok (\(Tok _ toktype' txt') ->
- toktype == toktype' &&
- txt == txt')
- let matchPattern toks = try $ mapM_ matchTok toks
- let getargs argmap [] = return argmap
- getargs argmap (Pattern toks : rest) = try $ do
- matchPattern toks
- getargs argmap rest
- getargs argmap (ArgNum i : Pattern toks : rest) =
- try $ do
- x <- mconcat <$> manyTill
- (braced <|> ((:[]) <$> anyTok))
- (matchPattern toks)
- getargs (M.insert i x argmap) rest
- getargs argmap (ArgNum i : rest) = do
- x <- try $ spaces >> bracedOrToken
- getargs (M.insert i x argmap) rest
- args <- case optarg of
- Nothing -> getargs M.empty argspecs
- Just o -> do
- x <- option o bracketedToks
- getargs (M.singleton 1 x) argspecs
- -- first boolean param is true if we're tokenizing
- -- an argument (in which case we don't want to
- -- expand #1 etc.)
- let addTok False (Tok _ (Arg i) _) acc =
- case M.lookup i args of
- Nothing -> mzero
- Just xs -> foldr (addTok True) acc xs
- -- see #4007
- addTok _ (Tok _ (CtrlSeq x) txt)
- acc@(Tok _ Word _ : _)
- | not (T.null txt) &&
- isLetter (T.last txt) =
- Tok spos (CtrlSeq x) (txt <> " ") : acc
- addTok _ t acc = setpos spos t : acc
- ts' <- getInput
- setInput $ foldr (addTok False) ts' newtoks
- case expansionPoint of
- ExpandWhenUsed ->
- if n > 20 -- detect macro expansion loops
- then throwError $ PandocMacroLoop (T.unpack name)
- else doMacros (n + 1)
- ExpandWhenDefined -> return ()
-
+ unless (expanded || verbatimMode) $ do
+ getInput >>= doMacros' 1 >>= setInput
+ updateState $ \st -> st{ sExpanded = True }
+
+doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
+doMacros' n inp = do
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros n spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros n spos ("end" <> name) ts
+ Tok _ (CtrlSeq "expandafter") _ : t : ts
+ -> combineTok t <$> doMacros' n ts
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros n spos name ts
+ _ -> return inp
+ <|> return inp
+
+ where
+ combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
+ | T.all isLetterOrAt w =
+ Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
+ where (x1, x2) = T.break isSpaceOrTab x
+ combineTok t ts = t:ts
+
+ matchTok (Tok _ toktype txt) =
+ satisfyTok (\(Tok _ toktype' txt') ->
+ toktype == toktype' &&
+ txt == txt')
+
+ matchPattern toks = try $ mapM_ matchTok toks
+
+ getargs argmap [] = return argmap
+ getargs argmap (Pattern toks : rest) = try $ do
+ matchPattern toks
+ getargs argmap rest
+ getargs argmap (ArgNum i : Pattern toks : rest) =
+ try $ do
+ x <- mconcat <$> manyTill (braced <|> ((:[]) <$> anyTok))
+ (matchPattern toks)
+ getargs (M.insert i x argmap) rest
+ getargs argmap (ArgNum i : rest) = do
+ x <- try $ spaces >> bracedOrToken
+ getargs (M.insert i x argmap) rest
+
+ addTok False args spos (Tok _ (Arg i) _) acc =
+ case M.lookup i args of
+ Nothing -> mzero
+ Just xs -> foldr (addTok True args spos) acc xs
+ -- see #4007
+ addTok _ _ spos (Tok _ (CtrlSeq x) txt)
+ acc@(Tok _ Word _ : _)
+ | not (T.null txt)
+ , isLetter (T.last txt) =
+ Tok spos (CtrlSeq x) (txt <> " ") : acc
+ addTok _ _ spos t acc = setpos spos t : acc
+
+ handleMacros n' spos name ts = do
+ when (n' > 20) -- detect macro expansion loops
+ $ throwError $ PandocMacroLoop (T.unpack name)
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> mzero
+ Just (Macro expansionPoint argspecs optarg newtoks) -> do
+ let getargs' = do
+ args <- case optarg of
+ Nothing -> getargs M.empty argspecs
+ Just o -> do
+ x <- option o bracketedToks
+ getargs (M.singleton 1 x) argspecs
+ rest <- getInput
+ return (args, rest)
+ lstate <- getState
+ res <- lift $ runParserT getargs' lstate "args" ts
+ case res of
+ Left _ -> fail $ "Could not parse arguments for " ++
+ T.unpack name
+ Right (args, rest) -> do
+ -- first boolean param is true if we're tokenizing
+ -- an argument (in which case we don't want to
+ -- expand #1 etc.)
+ let result = foldr (addTok False args spos) rest newtoks
+ case expansionPoint of
+ ExpandWhenUsed -> doMacros' (n' + 1) result
+ ExpandWhenDefined -> return result
setpos :: SourcePos -> Tok -> Tok
setpos spos (Tok _ tt txt) = Tok spos tt txt
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d1ea7a1a5..5944ecf82 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1586,6 +1586,7 @@ symbol = do
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
+ updateLastStrPos
return $ return $ B.str [result]
-- parses inline code, between n `s and n `s
@@ -1632,7 +1633,7 @@ enclosure c = do
3 -> three c
2 -> two c mempty
1 -> one c mempty
- _ -> return (return $ B.str cs)
+ _ -> updateLastStrPos >> return (return $ B.str cs)
ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
ender c n = try $ do
@@ -1716,19 +1717,24 @@ nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
+ canRelocateSpace <- notAfterString
result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
if not (null result) && last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace <|> endline
- lookAhead alphaNum
+ -- ?? lookAhead alphaNum
+ -- replace space after with nonbreaking space
+ -- if softbreak, move before abbrev if possible (#4635)
return $ do
ils' <- ils
- if ils' == B.space
- then return (B.str result <> B.str "\160")
- else -- linebreak or softbreak
- return (ils' <> B.str result <> B.str "\160"))
+ case B.toList ils' of
+ [Space] ->
+ return (B.str result <> B.str "\160")
+ [SoftBreak] | canRelocateSpace ->
+ return (ils' <> B.str result <> B.str "\160")
+ _ -> return (B.str result <> ils'))
<|> return (return (B.str result))
else return (return (B.str result)))
<|> return (return (B.str result))
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 134598c07..6acc88b3d 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -44,7 +44,6 @@ import Control.Monad
import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
-import Data.Char (isAlphaNum)
import Data.Default
import Data.List (intercalate)
import Data.List.Split (splitOn)
@@ -59,8 +58,8 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F, enclosed)
-import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft)
+import Text.Pandoc.Parsing hiding (F)
+import Text.Pandoc.Shared (crFilter, underlineSpan)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -69,9 +68,9 @@ readMuse :: PandocMonad m
-> m Pandoc
readMuse opts s = do
let input = crFilter s
- res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def
+ res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
case res of
- Left e -> throwError e
+ Left e -> throwError $ PandocParsecError (unpack input) e
Right d -> return d
type F = Future MuseState
@@ -83,7 +82,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
, museNotes :: M.Map String (SourcePos, F Blocks)
- , museInPara :: Bool -- ^ True when looking for a paragraph terminator
}
instance Default MuseState where
@@ -94,15 +92,17 @@ instance Default MuseState where
, museLastStrPos = Nothing
, museLogMessages = []
, museNotes = M.empty
- , museInPara = False
}
data MuseEnv =
MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+ , museInPara :: Bool -- ^ True when parsing paragraph is not allowed
}
instance Default MuseEnv where
- def = MuseEnv { museInLink = False }
+ def = MuseEnv { museInLink = False
+ , museInPara = False
+ }
type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
@@ -131,18 +131,12 @@ parseMuse = do
many directive
blocks <- (:) <$> parseBlocks <*> many parseSection
st <- getState
- let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st
- reportLogMessages
- return doc
+ runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st <$ reportLogMessages
-- * Utility functions
commonPrefix :: String -> String -> String
-commonPrefix _ [] = []
-commonPrefix [] _ = []
-commonPrefix (x:xs) (y:ys)
- | x == y = x : commonPrefix xs ys
- | otherwise = []
+commonPrefix xs ys = map fst $ takeWhile (uncurry (==)) $ zip xs ys
-- | Trim up to one newline from the beginning of the string.
lchop :: String -> String
@@ -159,12 +153,11 @@ dropSpacePrefix lns =
where flns = filter (not . all (== ' ')) lns
maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
-atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
-atStart p = do
+atStart :: PandocMonad m => MuseParser m ()
+atStart = do
pos <- getPosition
st <- getState
guard $ museLastStrPos st /= Just pos
- p
firstColumn :: PandocMonad m => MuseParser m ()
firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
@@ -206,18 +199,16 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
classes = maybe [] words $ lookup "class" attrs
- keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"]
parseHtmlContent :: PandocMonad m
=> String -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
-parseHtmlContent tag = try $ do
- indent <- getIndent
- attr <- openTag tag
- manyTill spaceChar eol
- content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag
- manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
- return (htmlAttrToPandoc attr, content)
+parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
+ <$> fmap htmlAttrToPandoc (openTag tag)
+ <* manyTill spaceChar eol
+ <*> allowPara (parseBlocksTill (try $ indentWith indent *> closeTag tag))
+ <* manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
-- ** Directive parsers
@@ -250,6 +241,9 @@ directive = do
-- ** Block parsers
+allowPara :: MonadReader MuseEnv m => m a -> m a
+allowPara p = local (\s -> s { museInPara = False }) p
+
-- | Parse section contents until EOF or next header
parseBlocks :: PandocMonad m
=> MuseParser m (F Blocks)
@@ -263,10 +257,9 @@ parseBlocks =
nextSection = mempty <$ lookAhead headingStart
parseEnd = mempty <$ eof
blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock)
- <*> parseBlocks
- listStart = do
- updateState (\st -> st { museInPara = False })
- uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
+ <*> allowPara parseBlocks
+ listStart =
+ uncurry (B.<>) <$> allowPara (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
paraStart = do
indent <- length <$> many spaceChar
uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks
@@ -282,46 +275,36 @@ parseSection =
parseBlocksTill :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks)
-parseBlocksTill end =
- try (parseEnd <|>
- blockStart <|>
- listStart <|>
- paraStart)
+parseBlocksTill end = continuation
where
parseEnd = mempty <$ end
- blockStart = (B.<>) <$> blockElements <*> continuation
- listStart = do
- updateState (\st -> st { museInPara = False })
- uncurry (B.<>) <$> anyListUntil (parseEnd <|> continuation)
+ blockStart = (B.<>) <$> blockElements <*> allowPara continuation
+ listStart = uncurry (B.<>) <$> allowPara (anyListUntil (parseEnd <|> continuation))
paraStart = uncurry (B.<>) <$> paraUntil (parseEnd <|> continuation)
- continuation = parseBlocksTill end
+ continuation = try $ parseEnd <|> blockStart <|> listStart <|> paraStart
listItemContentsUntil :: PandocMonad m
=> Int
-> MuseParser m a
-> MuseParser m a
-> MuseParser m (F Blocks, a)
-listItemContentsUntil col pre end =
- try blockStart <|>
- try listStart <|>
- try paraStart
+listItemContentsUntil col pre end = p
where
+ p = try blockStart <|> try listStart <|> try paraStart
parsePre = (mempty,) <$> pre
parseEnd = (mempty,) <$> end
paraStart = do
(f, (r, e)) <- paraUntil (parsePre <|> continuation <|> parseEnd)
return (f B.<> r, e)
blockStart = first <$> ((B.<>) <$> blockElements)
- <*> (parsePre <|> continuation <|> parseEnd)
+ <*> allowPara (parsePre <|> continuation <|> parseEnd)
listStart = do
- updateState (\st -> st { museInPara = False })
- (f, (r, e)) <- anyListUntil (parsePre <|> continuation <|> parseEnd)
+ (f, (r, e)) <- allowPara $ anyListUntil (parsePre <|> continuation <|> parseEnd)
return (f B.<> r, e)
continuation = try $ do blank <- optionMaybe blankline
skipMany blankline
indentWith col
- updateState (\st -> st { museInPara = museInPara st && isNothing blank })
- listItemContentsUntil col pre end
+ local (\s -> s { museInPara = museInPara s && isNothing blank }) p
parseBlock :: PandocMonad m => MuseParser m (F Blocks)
parseBlock = do
@@ -331,25 +314,22 @@ parseBlock = do
where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
blockElements :: PandocMonad m => MuseParser m (F Blocks)
-blockElements = do
- updateState (\st -> st { museInPara = False })
- choice [ mempty <$ blankline
- , comment
- , separator
- , example
- , exampleTag
- , literalTag
- , centerTag
- , rightTag
- , quoteTag
- , divTag
- , biblioTag
- , playTag
- , verseTag
- , lineBlock
- , table
- , commentTag
- ]
+blockElements = (mempty <$ blankline)
+ <|> comment
+ <|> separator
+ <|> example
+ <|> exampleTag
+ <|> literalTag
+ <|> centerTag
+ <|> rightTag
+ <|> quoteTag
+ <|> divTag
+ <|> biblioTag
+ <|> playTag
+ <|> verseTag
+ <|> lineBlock
+ <|> table
+ <|> commentTag
-- | Parse a line comment, starting with @;@ in the first column.
comment :: PandocMonad m => MuseParser m (F Blocks)
@@ -445,9 +425,9 @@ divTag = do
-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@.
-- @\<biblio>@ tag is supported only in Text::Amuse mode.
biblioTag :: PandocMonad m => MuseParser m (F Blocks)
-biblioTag = do
- guardEnabled Ext_amuse
- fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio"
+biblioTag = fmap (B.divWith ("", ["biblio"], [])) . snd
+ <$ guardEnabled Ext_amuse
+ <*> parseHtmlContent "biblio"
-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@.
-- @\<play>@ tag is supported only in Text::Amuse mode.
@@ -463,13 +443,11 @@ verseLine = (<>)
-- | Parse @\<verse>@ tag.
verseTag :: PandocMonad m => MuseParser m (F Blocks)
-verseTag = try $ do
- indent <- getIndent
- openTag "verse"
- manyTill spaceChar eol
- content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse")
- manyTill spaceChar eol
- return $ B.lineBlock <$> content
+verseTag = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence
+ <$ openTag "verse"
+ <* manyTill spaceChar eol
+ <*> manyTill (indentWith indent *> verseLine) (try $ indentWith indent *> closeTag "verse")
+ <* manyTill spaceChar eol
-- | Parse @\<comment>@ tag.
commentTag :: PandocMonad m => MuseParser m (F Blocks)
@@ -483,19 +461,16 @@ commentTag = try $ mempty
paraContentsUntil :: PandocMonad m
=> MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Inlines, a)
-paraContentsUntil end = do
- updateState (\st -> st { museInPara = True })
- (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end)
- updateState (\st -> st { museInPara = False })
- return (trimInlinesF $ mconcat l, e)
+paraContentsUntil end = first (trimInlinesF . mconcat)
+ <$> someUntil inline (try (manyTill spaceChar eol *> local (\s -> s { museInPara = True}) end))
-- | Parse a paragraph.
paraUntil :: PandocMonad m
=> MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
paraUntil end = do
- state <- getState
- guard $ not $ museInPara state
+ inPara <- asks museInPara
+ guard $ not inPara
first (fmap B.para) <$> paraContentsUntil end
noteMarker :: PandocMonad m => MuseParser m String
@@ -504,6 +479,17 @@ noteMarker = try $ (:)
<*> oneOf "123456789"
<*> manyTill digit (char ']')
+addNote :: PandocMonad m
+ => String
+ -> SourcePos
+ -> F Blocks
+ -> MuseParser m ()
+addNote ref pos content = do
+ oldnotes <- museNotes <$> getState
+ when (M.member ref oldnotes)
+ (logMessage $ DuplicateNoteReference ref pos)
+ updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
+
-- Amusewiki version of note
-- Parsing is similar to list item, except that note marker is used instead of list marker
amuseNoteBlockUntil :: PandocMonad m
@@ -513,12 +499,8 @@ amuseNoteBlockUntil end = try $ do
guardEnabled Ext_amuse
ref <- noteMarker <* spaceChar
pos <- getPosition
- updateState (\st -> st { museInPara = False })
- (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end
- oldnotes <- museNotes <$> getState
- when (M.member ref oldnotes)
- (logMessage $ DuplicateNoteReference ref pos)
- updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
+ (content, e) <- allowPara $ listItemContentsUntil (sourceColumn pos - 1) (fail "x") end
+ addNote ref pos content
return (mempty, e)
-- Emacs version of note
@@ -526,13 +508,10 @@ amuseNoteBlockUntil end = try $ do
emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks)
emacsNoteBlock = try $ do
guardDisabled Ext_amuse
- pos <- getPosition
ref <- noteMarker <* skipSpaces
- content <- mconcat <$> blocksTillNote
- oldnotes <- museNotes <$> getState
- when (M.member ref oldnotes)
- (logMessage $ DuplicateNoteReference ref pos)
- updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
+ pos <- getPosition
+ content <- fmap mconcat blocksTillNote
+ addNote ref pos content
return mempty
where
blocksTillNote =
@@ -544,10 +523,8 @@ emacsNoteBlock = try $ do
-- | Parse a line block indicated by @\'>\'@ characters.
lineBlock :: PandocMonad m => MuseParser m (F Blocks)
-lineBlock = try $ do
- indent <- getIndent
- lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
- return $ B.lineBlock <$> sequence lns
+lineBlock = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence
+ <$> (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
where
blankVerseLine = try $ mempty <$ char '>' <* blankline
nonblankVerseLine = try (string "> ") *> verseLine
@@ -561,8 +538,7 @@ bulletListItemsUntil :: PandocMonad m
bulletListItemsUntil indent end = try $ do
char '-'
void spaceChar <|> lookAhead eol
- updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- allowPara $ listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
return (x:xs, e)
-- | Parse a bullet list.
@@ -598,8 +574,7 @@ orderedListItemsUntil indent style end =
continuation = try $ do
pos <- getPosition
void spaceChar <|> lookAhead eol
- updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
+ (x, (xs, e)) <- allowPara $ listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
return (x:xs, e)
-- | Parse an ordered list.
@@ -620,8 +595,7 @@ descriptionsUntil :: PandocMonad m
-> MuseParser m ([F Blocks], a)
descriptionsUntil indent end = do
void spaceChar <|> lookAhead eol
- updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- allowPara $ listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
return (x:xs, e)
definitionListItemsUntil :: PandocMonad m
@@ -686,12 +660,8 @@ museAppendElement element tbl =
MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl }
MuseCaption inlines -> tbl{ museTableCaption = inlines }
-tableCell :: PandocMonad m => MuseParser m (F Blocks)
-tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
- where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol
-
tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
-tableElements = sequence <$> (tableParseElement `sepEndBy1` eol)
+tableElements = sequence <$> many1 tableParseElement
elementsToTable :: [MuseTableElement] -> MuseTable
elementsToTable = foldr museAppendElement emptyTable
@@ -710,10 +680,10 @@ tableParseElement = tableParseHeader
tableParseRow :: PandocMonad m
=> Int -- ^ Number of separator characters
-> MuseParser m (F [Blocks])
-tableParseRow n = try $
- sequence <$> (tableCell `sepBy2` fieldSep)
- where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p)
- fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline))
+tableParseRow n = try $ sequence <$> tableCells
+ where tableCells = (:) <$> tableCell sep <*> (tableCells <|> fmap pure (tableCell eol))
+ tableCell p = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline' p
+ sep = try $ many1 spaceChar *> count n (char '|') *> lookAhead (void (many1 spaceChar) <|> void eol)
-- | Parse a table header row.
tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
@@ -732,7 +702,7 @@ tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat
<$ many spaceChar
<* string "|+"
- <*> many1Till inline (try $ string "+|")
+ <*> many1Till inline (try $ string "+|" *> eol)
-- ** Inline parsers
@@ -803,24 +773,15 @@ whitespace = try $ pure B.space <$ skipMany1 spaceChar
br :: PandocMonad m => MuseParser m (F Inlines)
br = try $ pure B.linebreak <$ string "<br>"
-emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
-emphasisBetween c = try $ enclosedInlines c c
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
- -> ParserT s st m end -- ^ end parser
- -> ParserT s st m a -- ^ content parser (to be used repeatedly)
- -> ParserT s st m [a]
-enclosed start end parser = try $
- start *> notFollowedBy spaceChar *> many1Till parser end
-
-enclosedInlines :: (PandocMonad m, Show a, Show b)
+emphasisBetween :: (PandocMonad m, Show a)
=> MuseParser m a
- -> MuseParser m b
-> MuseParser m (F Inlines)
-enclosedInlines start end = try $ trimInlinesF . mconcat
- <$> enclosed (atStart start) end inline
- <* notFollowedBy (satisfy isAlphaNum)
+emphasisBetween p = try $ trimInlinesF . mconcat
+ <$ atStart
+ <* p
+ <* notFollowedBy spaceChar
+ <*> many1Till inline p
+ <* notFollowedBy alphaNum
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
@@ -875,8 +836,7 @@ verbatimTag = return . B.text
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
classes <- maybe [] words . lookup "name" <$> openTag "class"
- res <- manyTill inline $ closeTag "class"
- return $ B.spanWith ("", classes, []) <$> mconcat res
+ fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class")
-- | Parse "~~" as nonbreaking space.
nbsp :: PandocMonad m => MuseParser m (F Inlines)
@@ -884,14 +844,12 @@ nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
-code = try $ do
- atStart $ char '='
- contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '='
- guard $ not $ null contents
- guard $ head contents `notElem` " \t\n"
- guard $ last contents `notElem` " \t\n"
- notFollowedBy $ satisfy isAlphaNum
- return $ return $ B.code contents
+code = try $ fmap pure $ B.code . uncurry (++)
+ <$ atStart
+ <* char '='
+ <* notFollowedBy (spaceChar <|> newline)
+ <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=')
+ <* notFollowedBy alphaNum
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
@@ -916,24 +874,24 @@ str :: PandocMonad m => MuseParser m (F Inlines)
str = return . B.str <$> many1 alphaNum <* updateLastStrPos
symbol :: PandocMonad m => MuseParser m (F Inlines)
-symbol = return . B.str <$> count 1 nonspaceChar
+symbol = pure . B.str . pure <$> nonspaceChar
-- | Parse a link or image.
linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
linkOrImage = try $ do
inLink <- asks museInLink
guard $ not inLink
- local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link)
+ local (\s -> s { museInLink = True }) (link "URL:" <|> image <|> link "")
linkContent :: PandocMonad m => MuseParser m (F Inlines)
linkContent = trimInlinesF . mconcat
<$ char '['
<*> manyTill inline (char ']')
--- | Parse a link starting with @URL:@
-explicitLink :: PandocMonad m => MuseParser m (F Inlines)
-explicitLink = try $ do
- string "[[URL:"
+-- | Parse a link starting with (possibly null) prefix
+link :: PandocMonad m => String -> MuseParser m (F Inlines)
+link prefix = try $ do
+ string $ "[[" ++ prefix
url <- manyTill anyChar $ char ']'
content <- option (pure $ B.str url) linkContent
char ']'
@@ -966,11 +924,3 @@ image = try $ do
<*> optionMaybe (many1 digit)
<* many spaceChar
<*> optionMaybe (oneOf "rlf")
-
-link :: PandocMonad m => MuseParser m (F Inlines)
-link = try $ do
- string "[["
- url <- manyTill anyChar $ char ']'
- content <- optionMaybe linkContent
- char ']'
- return $ B.link url "" <$> fromMaybe (return $ B.str url) content
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 28fa7b83e..1938ca171 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -665,11 +665,13 @@ directive' = do
optional blanklines
let body' = body ++ "\n\n"
name = trim $ fromMaybe "" (lookup "name" fields)
- imgAttr cl = ("", classes, widthAttr ++ heightAttr)
+ classes = words $ maybe "" trim (lookup "class" fields)
+ keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"]
+ imgAttr cl = ("", classes ++ alignClasses, widthAttr ++ heightAttr)
where
- classes = words $ maybe "" trim (lookup cl fields) ++
- maybe "" (\x -> "align-" ++ trim x)
- (lookup "align" fields)
+ alignClasses = words $ maybe "" trim (lookup cl fields) ++
+ maybe "" (\x -> "align-" ++ trim x)
+ (lookup "align" fields)
scale = case trim <$> lookup "scale" fields of
Just v -> case reverse v of
'%':vv ->
@@ -698,8 +700,9 @@ directive' = do
"line-block" -> lineBlockDirective body'
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
"role" -> addNewRole top $ map (second trim) fields
- "container" -> B.divWith (name, "container" : words top, []) <$>
- parseFromString' parseBlocks body'
+ "container" -> B.divWith
+ (name, "container" : words top ++ classes, []) <$>
+ parseFromString' parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
parseInlineFromString (trim top)
"unicode" -> B.para <$> -- consumed by substKey
@@ -717,7 +720,7 @@ directive' = do
(l:ls) -> B.divWith ("",["admonition-title"],[])
(B.para (B.str (toUpper l : ls)))
[] -> mempty
- return $ B.divWith ("",[label],[]) (lab <> bod)
+ return $ B.divWith (name,label:classes,keyvals) (lab <> bod)
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
tit <- B.para . B.strong <$> parseInlineFromString
@@ -725,21 +728,21 @@ directive' = do
then ""
else (": " ++ subtit))
bod <- parseFromString' parseBlocks body'
- return $ B.divWith ("",["sidebar"],[]) $ tit <> bod
+ return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod
"topic" ->
do tit <- B.para . B.strong <$> parseInlineFromString top
bod <- parseFromString' parseBlocks body'
- return $ B.divWith ("",["topic"],[]) $ tit <> bod
+ return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
s { stateRstDefaultRole =
case trim top of
"" -> stateRstDefaultRole def
role -> role })
x | x == "code" || x == "code-block" ->
- codeblock (words $ fromMaybe [] $ lookup "class" fields)
+ codeblock name classes
(lookup "number-lines" fields) (trim top) body
"aafig" -> do
- let attribs = ("", ["aafig"], map (second trimr) fields)
+ let attribs = (name, ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
"math" -> return $ B.para $ mconcat $ map B.displayMath
$ toChunks $ top ++ "\n\n" ++ body
@@ -758,8 +761,7 @@ directive' = do
$ B.imageWith attr src "" alt
Nothing -> B.imageWith attr src "" alt
"class" -> do
- let attrs = ("", splitBy isSpace $ trim top,
- map (second trimr) fields)
+ let attrs = (name, words (trim top), map (second trimr) fields)
-- directive content or the first immediately following element
children <- case body of
"" -> block
@@ -769,7 +771,7 @@ directive' = do
pos <- getPosition
logMessage $ SkippedContent (".. " ++ other) pos
bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
- return $ B.divWith ("",[other],[]) bod
+ return $ B.divWith (name, other:classes, keyvals) bod
tableDirective :: PandocMonad m
=> String -> [(String, String)] -> String -> RSTParser m Blocks
@@ -989,10 +991,11 @@ toChunks = dropWhile null
then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}"
else s
-codeblock :: [String] -> Maybe String -> String -> String -> RSTParser m Blocks
-codeblock classes numberLines lang body =
+codeblock :: String -> [String] -> Maybe String -> String -> String
+ -> RSTParser m Blocks
+codeblock ident classes numberLines lang body =
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
- where attribs = ("", classes', kvs)
+ where attribs = (ident, classes', kvs)
classes' = "sourceCode" : lang
: maybe [] (const ["numberLines"]) numberLines
++ classes
@@ -1266,7 +1269,9 @@ simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
else simpleTableSep '=' >> anyLine
- dashes <- simpleDashedLines '=' <|> simpleDashedLines '-'
+ dashes <- if headless
+ then simpleDashedLines '='
+ else simpleDashedLines '=' <|> simpleDashedLines '-'
newline
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 15f0d991f..6fdbcb50e 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -85,12 +85,12 @@ import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code,
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition (Attr, Block (BulletList, OrderedList),
Inline (Space), ListNumberDelim (..),
- ListNumberStyle (..), Meta, Pandoc (..),
+ ListNumberStyle (..), Pandoc (..),
nullMeta)
import Text.Pandoc.Options (ReaderOptions)
-import Text.Pandoc.Parsing (F, ParserState, ParserT, blanklines, emailAddress,
+import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
- registerHeader, runF, spaceChar, stateMeta',
+ registerHeader, spaceChar, stateMeta,
stateOptions, uri)
import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast)
import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
@@ -126,7 +126,7 @@ parseVimwiki = do
spaces
eof
st <- getState
- let meta = runF (stateMeta' st) st
+ let meta = stateMeta st
return $ Pandoc meta (toList bs)
-- block parser
@@ -444,8 +444,8 @@ ph s = try $ do
many spaceChar >>string ('%':s) >> spaceChar
contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline)
--use lookAhead because of placeholder in the whitespace parser
- let meta' = return $ B.setMeta s contents nullMeta :: F Meta
- updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' }
+ let meta' = B.setMeta s contents nullMeta
+ updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
noHtmlPh :: PandocMonad m => VwParser m ()
noHtmlPh = try $