diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 18:41:01 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 18:41:01 +0000 |
commit | 1645fb65e4a486de95f5375bcc029a139d4d8c45 (patch) | |
tree | ed7fcc3e9634f9586c5b9da8acfdd0a0fc116e47 /src/Text/Pandoc/Readers | |
parent | bb8478e4e24b431ca81ee7f669d517eb11a47500 (diff) | |
download | pandoc-1645fb65e4a486de95f5375bcc029a139d4d8c45.tar.gz |
Fixed serious performance problems with new Markdown reader:
Instead of using lookahead to determine whether a single quote
is an apostrophe, we now use state. Inside single quotes,
a ' character won't be recognized as the beginning of a single
quote. 'stateQuoteContext' has been added to keep track of
this.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@437 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 48 |
1 files changed, 35 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7fab2ad01..35ceb7807 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -594,24 +594,46 @@ apostrophe = do quoted = do doubleQuoted <|> singleQuoted -singleQuoted = try (do - result <- enclosed singleQuoteStart singleQuoteEnd - (do{notFollowedBy' singleQuoted; inline} <|> apostrophe) - return $ Quoted SingleQuote $ normalizeSpaces result) - -doubleQuoted = try (do - result <- enclosed doubleQuoteStart doubleQuoteEnd inline - return $ Quoted DoubleQuote $ normalizeSpaces result) +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ do + notFollowedBy space + result <- many1Till inline singleQuoteEnd + return $ Quoted SingleQuote $ normalizeSpaces result + +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ do + notFollowedBy space + result <- many1Till inline doubleQuoteEnd + return $ Quoted DoubleQuote $ normalizeSpaces result + +failIfInQuoteContext context = do + st <- getState + if (stateQuoteContext st == context) + then fail "already inside quotes" + else return () -singleQuoteStart = try (do +singleQuoteStart = do + failIfInQuoteContext InSingleQuote char '\'' <|> char '\8216' - notFollowedBy' whitespace) singleQuoteEnd = try (do - oneOfStrings ["'", "\8217"] + char '\'' <|> char '\8217' notFollowedBy alphaNum) -doubleQuoteStart = char '"' <|> char '\8220' +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + char '"' <|> char '\8220' doubleQuoteEnd = char '"' <|> char '\8221' @@ -623,7 +645,7 @@ dash = enDash <|> emDash enDash = try (do char '-' - followedBy' (many1 digit) + notFollowedBy (noneOf "0123456789") return EnDash) emDash = try (do |