diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-02-07 21:50:55 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-02-07 22:46:41 -0800 |
commit | 7a602d222f6ba005a901d6b3322f9392602d6b9d (patch) | |
tree | 0e520370ae14be0ec50b6a649c7f847318508841 /src | |
parent | 12aa83f70eb5d5fa9f472be2347b4cefa7975b62 (diff) | |
download | pandoc-7a602d222f6ba005a901d6b3322f9392602d6b9d.tar.gz |
Limit nesting of strong/emph.
This avoids exponential lookahead in parasitic cases, like
a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**.
Added stateMaxNestingLevel to ParserState.
We set this to 6, so you can still have Emph inside Emph, just not
indefinitely.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 16 |
2 files changed, 16 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0468ceec5..725621ce2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -629,6 +629,7 @@ data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed stateKeys :: KeyTable, -- ^ List of reference keys stateCitations :: [String], -- ^ List of available citations @@ -660,6 +661,7 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, stateCitations = [], diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 607d0971a..31db51028 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1041,8 +1041,20 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end +-- This is used to prevent exponential blowups for things like: +-- a**a*a**a*a**a*a**a*a**a*a**a*a** +nested :: GenParser Char ParserState a + -> GenParser Char ParserState a +nested p = do + nestlevel <- stateMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + emph :: GenParser Char ParserState Inline -emph = Emph `liftM` +emph = Emph `fmap` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar starEnd = notFollowedBy' strong >> char '*' @@ -1050,7 +1062,7 @@ emph = Emph `liftM` ulEnd = notFollowedBy' strong >> char '_' strong :: GenParser Char ParserState Inline -strong = Strong `liftM` +strong = Strong `liftM` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" |