From c66921f2acea456af527b93e2daa1d8594798642 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 6 Dec 2010 22:11:27 -0800 Subject: Markdown reader: better handling of intraword _. The 'str' parser now reads internal _'s as part of the string. This prevents pandoc from getting started looking for an emphasized block, which can cause exponential slowdowns in some cases. Resolves Issue #182. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 90e8e19bb..ad7d2a0cc 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, htmlBlockElement, htmlComment, unsanitaryURI ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, unless, guard) +import Control.Monad (when, liftM, guard) import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition) -- | Read markdown from an input string and return a Pandoc document. @@ -121,7 +121,7 @@ inlinesInBalancedBrackets :: GenParser Char ParserState Inline inlinesInBalancedBrackets parser = try $ do char '[' result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - unless (res == "[") pzero + guard (res == "[") bal <- inlinesInBalancedBrackets parser return $ [Str "["] ++ bal ++ [Str "]"]) <|> (count 1 parser)) @@ -1138,7 +1138,9 @@ strChar = noneOf (specialChars ++ " \t\n") str :: GenParser Char ParserState Inline str = do - result <- many1 strChar + a <- strChar + as <- many (strChar <|> (try $ char '_' >>~ lookAhead strChar)) + let result = a:as state <- getState let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) if stateSmart state -- cgit v1.2.3