From 7a602d222f6ba005a901d6b3322f9392602d6b9d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Feb 2012 21:50:55 -0800 Subject: Limit nesting of strong/emph. This avoids exponential lookahead in parasitic cases, like a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**. Added stateMaxNestingLevel to ParserState. We set this to 6, so you can still have Emph inside Emph, just not indefinitely. --- src/Text/Pandoc/Parsing.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0468ceec5..725621ce2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -629,6 +629,7 @@ data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed stateKeys :: KeyTable, -- ^ List of reference keys stateCitations :: [String], -- ^ List of available citations @@ -660,6 +661,7 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, stateCitations = [], diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 607d0971a..31db51028 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1041,8 +1041,20 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end +-- This is used to prevent exponential blowups for things like: +-- a**a*a**a*a**a*a**a*a**a*a**a*a** +nested :: GenParser Char ParserState a + -> GenParser Char ParserState a +nested p = do + nestlevel <- stateMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + emph :: GenParser Char ParserState Inline -emph = Emph `liftM` +emph = Emph `fmap` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar starEnd = notFollowedBy' strong >> char '*' @@ -1050,7 +1062,7 @@ emph = Emph `liftM` ulEnd = notFollowedBy' strong >> char '_' strong :: GenParser Char ParserState Inline -strong = Strong `liftM` +strong = Strong `liftM` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" -- cgit v1.2.3