diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2009-11-28 03:22:18 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2009-11-28 03:22:18 +0000 |
commit | d1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3 (patch) | |
tree | 2d27c04cfccdd5f6606219c1277428ed20cbefc7 /src/Text/Pandoc/Readers | |
parent | 1d440130c431405f7edbd7d8beae584101debbb6 (diff) | |
download | pandoc-d1b80f8f350c7588ba2c95f2c4a646f7af5a5cb3.tar.gz |
Markdown reader: parse refs and notes in the same pass.
Previously the markdown reader made one pass for references,
a second pass for notes (which it parsed and stored in the
parser state), and a third pass for the rest. This patch
achieves a 10% speed improvement by storing the raw notes
on the first (reference) pass, then parsing them when the
notes are inserted into the AST. This eliminates the need
for a second pass to parse notes.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1629 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 33 |
1 files changed, 13 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 226252381..7a16f1578 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -164,23 +164,18 @@ parseMarkdown = do -- markdown allows raw HTML updateState (\state -> state { stateParseRaw = True }) startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= - return . concat + -- go through once just to get list of reference keys and notes + -- docMinusKeys is the raw document with blanks where the keys/notes were... + st <- getState + let firstPassParser = referenceKey + <|> (if stateStrict st then pzero else noteBlock) + <|> lineClump + docMinusKeys <- liftM concat $ manyTill firstPassParser eof setInput docMinusKeys setPosition startPos - st <- getState - -- go through again for notes unless strict... - if stateStrict st - then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= - return . concat - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - setInput docMinusNotes - setPosition startPos + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... (title, author, date) <- option ([],[],"") titleBlock blocks <- parseBlocks @@ -243,9 +238,7 @@ noteBlock = try $ do raw <- sepBy rawLines (try (blankline >> indentSpaces)) optional blanklines endPos <- getPosition - -- parse the extracted text, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - let newnote = (ref, contents) + let newnote = (ref, (intercalate "\n" raw) ++ "\n\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } @@ -1174,8 +1167,8 @@ note = try $ do state <- getState let notes = stateNotes state case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return $ Note contents + Nothing -> fail "note not found" + Just raw -> liftM Note $ parseFromString parseBlocks raw inlineNote :: GenParser Char ParserState Inline inlineNote = try $ do |