diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 48 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 39 |
2 files changed, 54 insertions, 33 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 82c61985a..aa2d9d14d 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -140,26 +140,24 @@ titleBlock = try $ do parseMarkdown = do -- markdown allows raw HTML updateState (\state -> state { stateParseRaw = True }) - (title, author, date) <- option ([],[],"") titleBlock + startPos <- getPosition -- go through once just to get list of reference keys - refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof - let keys = map (\(KeyBlock label target) -> (label, target)) $ - filter isKeyBlock refs - -- strip out keys - setInput $ concatMap (\(LineClump ln) -> ln) $ filter isLineClump refs - updateState (\state -> state { stateKeys = keys }) + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- many (referenceKey <|> lineClump) >>= return . concat + setInput docMinusKeys + setPosition startPos st <- getState + -- go through again for notes unless strict... if stateStrict st then return () - else do -- go through for notes (which may contain refs - hence 2nd pass) - refs' <- manyTill (noteBlock <|> - (lineClump >>= return . LineClump)) eof - let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ - filter isNoteBlock refs' - updateState (\state -> state { stateNotes = notes }) - setInput $ concatMap (\(LineClump ln) -> ln) $ - filter isLineClump refs' - -- go through again, with note blocks and keys stripped out + else do docMinusNotes <- many (noteBlock <|> lineClump) >>= return . concat + st <- getState + let reversedNotes = stateNotes st + updateState $ \st -> st { stateNotes = reverse reversedNotes } + setInput docMinusNotes + setPosition startPos + -- now parse it for real... + (title, author, date) <- option ([],[],"") titleBlock blocks <- parseBlocks return $ Pandoc (Meta title author date) $ filter (/= Null) blocks @@ -168,6 +166,7 @@ parseMarkdown = do -- referenceKey = try $ do + startPos <- getPosition nonindentSpaces label <- reference char ':' @@ -177,7 +176,13 @@ referenceKey = try $ do optional (char '>') tit <- option "" referenceTitle blanklines - return $ KeyBlock label (removeTrailingSpace src, tit) + endPos <- getPosition + let newkey = (label, (removeTrailingSpace src, tit)) + st <- getState + let oldkeys = stateKeys st + updateState $ \st -> st { stateKeys = newkey : oldkeys } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' referenceTitle = try $ do skipSpaces @@ -201,15 +206,22 @@ rawLine = do rawLines = many1 rawLine >>= return . concat noteBlock = try $ do + startPos <- getPosition ref <- noteMarker char ':' optional blankline optional indentSpaces raw <- sepBy rawLines (try (blankline >> indentSpaces)) optional blanklines + endPos <- getPosition -- parse the extracted text, which may contain various block elements: contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" - return $ NoteBlock ref contents + let newnote = (ref, contents) + st <- getState + let oldnotes = stateNotes st + updateState $ \st -> st { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- -- parsing blocks diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index be55ae4c3..0103087a5 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -83,15 +83,17 @@ titleTransform ((Header 1 head1):rest) = -- title, no subtitle titleTransform blocks = (blocks, []) parseRST = do - -- first pass: get keys - refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof - let keys = map (\(KeyBlock label target) -> (label, target)) $ - filter isKeyBlock refs - -- second pass, with keys stripped out - let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines - updateState (\state -> state { stateKeys = keys }) - blocks <- parseBlocks + startPos <- getPosition + -- go through once just to get list of reference keys + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- many (referenceKey <|> lineClump) >>= return . concat + setInput docMinusKeys + setPosition startPos + st <- getState + let reversedKeys = stateKeys st + updateState $ \st -> st { stateKeys = reverse reversedKeys } + -- now parse it for real... + blocks <- parseBlocks let blocks' = filter (/= Null) blocks state <- getState let (blocks'', title) = if stateStandalone state @@ -429,9 +431,16 @@ unknownDirective = try $ do -- reference key -- -referenceKey = - choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] >>~ +referenceKey = do + startPos <- getPosition + key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] + st <- getState + let oldkeys = stateKeys st + updateState $ \st -> st { stateKeys = key : oldkeys } optional blanklines + endPos <- getPosition + -- return enough blanks to replace key + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' targetURI = do skipSpaces @@ -447,26 +456,26 @@ imageKey = try $ do skipSpaces string "image::" src <- targetURI - return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI state <- getState - return $ KeyBlock [Str "_"] (removeLeadingTrailingSpace src, "") + return ([Str "_"], (removeLeadingTrailingSpace src, "")) regularKeyQuoted = try $ do string ".. _`" ref <- manyTill inline (char '`') char ':' src <- targetURI - return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) regularKey = try $ do string ".. _" ref <- manyTill inline (char ':') src <- targetURI - return $ KeyBlock (normalizeSpaces ref) (removeLeadingTrailingSpace src, "") + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) -- -- inline |