diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 18:41:01 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-06 18:41:01 +0000 |
commit | 1645fb65e4a486de95f5375bcc029a139d4d8c45 (patch) | |
tree | ed7fcc3e9634f9586c5b9da8acfdd0a0fc116e47 | |
parent | bb8478e4e24b431ca81ee7f669d517eb11a47500 (diff) | |
download | pandoc-1645fb65e4a486de95f5375bcc029a139d4d8c45.tar.gz |
Fixed serious performance problems with new Markdown reader:
Instead of using lookahead to determine whether a single quote
is an apostrophe, we now use state. Inside single quotes,
a ' character won't be recognized as the beginning of a single
quote. 'stateQuoteContext' has been added to keep track of
this.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@437 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 48 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 9 |
2 files changed, 44 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7fab2ad01..35ceb7807 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -594,24 +594,46 @@ apostrophe = do quoted = do doubleQuoted <|> singleQuoted -singleQuoted = try (do - result <- enclosed singleQuoteStart singleQuoteEnd - (do{notFollowedBy' singleQuoted; inline} <|> apostrophe) - return $ Quoted SingleQuote $ normalizeSpaces result) - -doubleQuoted = try (do - result <- enclosed doubleQuoteStart doubleQuoteEnd inline - return $ Quoted DoubleQuote $ normalizeSpaces result) +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ do + notFollowedBy space + result <- many1Till inline singleQuoteEnd + return $ Quoted SingleQuote $ normalizeSpaces result + +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ do + notFollowedBy space + result <- many1Till inline doubleQuoteEnd + return $ Quoted DoubleQuote $ normalizeSpaces result + +failIfInQuoteContext context = do + st <- getState + if (stateQuoteContext st == context) + then fail "already inside quotes" + else return () -singleQuoteStart = try (do +singleQuoteStart = do + failIfInQuoteContext InSingleQuote char '\'' <|> char '\8216' - notFollowedBy' whitespace) singleQuoteEnd = try (do - oneOfStrings ["'", "\8217"] + char '\'' <|> char '\8217' notFollowedBy alphaNum) -doubleQuoteStart = char '"' <|> char '\8220' +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + char '"' <|> char '\8220' doubleQuoteEnd = char '"' <|> char '\8221' @@ -623,7 +645,7 @@ dash = enDash <|> emDash enDash = try (do char '-' - followedBy' (many1 digit) + notFollowedBy (noneOf "0123456789") return EnDash) emDash = try (do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 91b44e6bf..624f573de 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Shared ( testStringWith, HeaderType (..), ParserContext (..), + QuoteContext (..), ParserState (..), defaultParserState, -- * Native format prettyprinting @@ -108,10 +109,17 @@ data ParserContext | NullState -- ^ Default state deriving (Eq, Show) +data QuoteContext + = InSingleQuote -- ^ Used when we're parsing inside single quotes + | InDoubleQuote -- ^ Used when we're parsing inside double quotes + | NoQuote -- ^ Used when we're not parsing inside quotes + deriving (Eq, Show) + data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML -- and LaTeX? stateParserContext :: ParserContext, -- ^ What are we parsing? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateKeyBlocks :: [Block], -- ^ List of reference key blocks stateKeysUsed :: [[Inline]], -- ^ List of references used stateNoteBlocks :: [Block], -- ^ List of note blocks @@ -134,6 +142,7 @@ defaultParserState :: ParserState defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, + stateQuoteContext = NoQuote, stateKeyBlocks = [], stateKeysUsed = [], stateNoteBlocks = [], |