aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-02-07 21:50:55 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-02-07 22:46:41 -0800
commit7a602d222f6ba005a901d6b3322f9392602d6b9d (patch)
tree0e520370ae14be0ec50b6a649c7f847318508841
parent12aa83f70eb5d5fa9f472be2347b4cefa7975b62 (diff)
downloadpandoc-7a602d222f6ba005a901d6b3322f9392602d6b9d.tar.gz
Limit nesting of strong/emph.
This avoids exponential lookahead in parasitic cases, like a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**. Added stateMaxNestingLevel to ParserState. We set this to 6, so you can still have Emph inside Emph, just not indefinitely.
-rw-r--r--src/Text/Pandoc/Parsing.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs16
2 files changed, 16 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0468ceec5..725621ce2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -629,6 +629,7 @@ data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
stateParserContext :: ParserContext, -- ^ Inside list?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
stateKeys :: KeyTable, -- ^ List of reference keys
stateCitations :: [String], -- ^ List of available citations
@@ -660,6 +661,7 @@ defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
+ stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
stateCitations = [],
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 607d0971a..31db51028 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1041,8 +1041,20 @@ inlinesBetween start end =
where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: GenParser Char ParserState a
+ -> GenParser Char ParserState a
+nested p = do
+ nestlevel <- stateMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
emph :: GenParser Char ParserState Inline
-emph = Emph `liftM`
+emph = Emph `fmap` nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = char '*' >> lookAhead nonspaceChar
starEnd = notFollowedBy' strong >> char '*'
@@ -1050,7 +1062,7 @@ emph = Emph `liftM`
ulEnd = notFollowedBy' strong >> char '_'
strong :: GenParser Char ParserState Inline
-strong = Strong `liftM`
+strong = Strong `liftM` nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = string "**" >> lookAhead nonspaceChar
starEnd = try $ string "**"