aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-06 18:41:01 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-06 18:41:01 +0000
commit1645fb65e4a486de95f5375bcc029a139d4d8c45 (patch)
treeed7fcc3e9634f9586c5b9da8acfdd0a0fc116e47
parentbb8478e4e24b431ca81ee7f669d517eb11a47500 (diff)
downloadpandoc-1645fb65e4a486de95f5375bcc029a139d4d8c45.tar.gz
Fixed serious performance problems with new Markdown reader:
Instead of using lookahead to determine whether a single quote is an apostrophe, we now use state. Inside single quotes, a ' character won't be recognized as the beginning of a single quote. 'stateQuoteContext' has been added to keep track of this. git-svn-id: https://pandoc.googlecode.com/svn/trunk@437 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs48
-rw-r--r--src/Text/Pandoc/Shared.hs9
2 files changed, 44 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7fab2ad01..35ceb7807 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -594,24 +594,46 @@ apostrophe = do
quoted = do
doubleQuoted <|> singleQuoted
-singleQuoted = try (do
- result <- enclosed singleQuoteStart singleQuoteEnd
- (do{notFollowedBy' singleQuoted; inline} <|> apostrophe)
- return $ Quoted SingleQuote $ normalizeSpaces result)
-
-doubleQuoted = try (do
- result <- enclosed doubleQuoteStart doubleQuoteEnd inline
- return $ Quoted DoubleQuote $ normalizeSpaces result)
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ do
+ notFollowedBy space
+ result <- many1Till inline singleQuoteEnd
+ return $ Quoted SingleQuote $ normalizeSpaces result
+
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ do
+ notFollowedBy space
+ result <- many1Till inline doubleQuoteEnd
+ return $ Quoted DoubleQuote $ normalizeSpaces result
+
+failIfInQuoteContext context = do
+ st <- getState
+ if (stateQuoteContext st == context)
+ then fail "already inside quotes"
+ else return ()
-singleQuoteStart = try (do
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
char '\'' <|> char '\8216'
- notFollowedBy' whitespace)
singleQuoteEnd = try (do
- oneOfStrings ["'", "\8217"]
+ char '\'' <|> char '\8217'
notFollowedBy alphaNum)
-doubleQuoteStart = char '"' <|> char '\8220'
+doubleQuoteStart = do
+ failIfInQuoteContext InDoubleQuote
+ char '"' <|> char '\8220'
doubleQuoteEnd = char '"' <|> char '\8221'
@@ -623,7 +645,7 @@ dash = enDash <|> emDash
enDash = try (do
char '-'
- followedBy' (many1 digit)
+ notFollowedBy (noneOf "0123456789")
return EnDash)
emDash = try (do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 91b44e6bf..624f573de 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -47,6 +47,7 @@ module Text.Pandoc.Shared (
testStringWith,
HeaderType (..),
ParserContext (..),
+ QuoteContext (..),
ParserState (..),
defaultParserState,
-- * Native format prettyprinting
@@ -108,10 +109,17 @@ data ParserContext
| NullState -- ^ Default state
deriving (Eq, Show)
+data QuoteContext
+ = InSingleQuote -- ^ Used when we're parsing inside single quotes
+ | InDoubleQuote -- ^ Used when we're parsing inside double quotes
+ | NoQuote -- ^ Used when we're not parsing inside quotes
+ deriving (Eq, Show)
+
data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse untranslatable HTML
-- and LaTeX?
stateParserContext :: ParserContext, -- ^ What are we parsing?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
stateKeyBlocks :: [Block], -- ^ List of reference key blocks
stateKeysUsed :: [[Inline]], -- ^ List of references used
stateNoteBlocks :: [Block], -- ^ List of note blocks
@@ -134,6 +142,7 @@ defaultParserState :: ParserState
defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
stateKeyBlocks = [],
stateKeysUsed = [],
stateNoteBlocks = [],