aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-09-22 13:59:30 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2012-09-22 14:05:09 -0700
commit22bd34fa338edc5fb38cce5adfa11ffbc451b0d4 (patch)
tree3598a3c76d5d788336a13021e6ef5ddf7d6b8950 /src
parent1038606036a800b8edfd54f96cb8841bf7998872 (diff)
downloadpandoc-22bd34fa338edc5fb38cce5adfa11ffbc451b0d4.tar.gz
Markdown reader: Fixed link parser to avoid exponential slowdowns.
Previously the parser would hang on input like this: [[[[[[[[[[[[[[[[[[hi We fixed this by making the link parser parser characters between balanced brackets (skipping brackets in inline code spans), then parsing the result as an inline list. One change is that [hi *there]* bud](/url) is now no longer parsed as a link. But in this respect pandoc behaved differently from most other implementations anyway, so that seems okay. All current tests pass. Added test for this case. Closes #620.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1c2cc12f1..d69348e30 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -132,15 +132,19 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines)
-inlinesInBalancedBrackets = try $ do
+inlinesInBalancedBrackets = charsInBalancedBrackets >>=
+ parseFromString (trimInlinesF . mconcat <$> many inline)
+
+charsInBalancedBrackets :: Parser [Char] ParserState [Char]
+charsInBalancedBrackets = do
char '['
- result <- manyTill ( (do lookAhead $ try $ do x <- inline
- guard (runF x def == B.str "[")
- bal <- inlinesInBalancedBrackets
- return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal)
- <|> inline)
- (char ']')
- return $ mconcat result
+ result <- manyTill ( many1 (noneOf "`[]\n")
+ <|> (snd <$> withRaw code)
+ <|> ((\xs -> '[' : xs ++ "]") <$> charsInBalancedBrackets)
+ <|> count 1 (satisfy (/='\n'))
+ <|> (newline >> notFollowedBy blankline >> return "\n")
+ ) (char ']')
+ return $ concat result
--
-- document structure