From aaf7de0ddaea292ba4e869a6f0fa5adaaf02b813 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Nov 2010 12:38:45 -0800 Subject: Markdown reader: Revised parser for new citation syntax. Suffixes and prefixes are now [Inline]. The locator is separated from the citation key by a blank space. The locator consists of one introductory word and any number of words containing at least one digit. The suffix, if any, is separated from the locator by a comma, and continues til the end of the citation. --- src/Text/Pandoc/Readers/Markdown.hs | 129 ++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 56 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4975ee02f..b0aab9c70 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -34,7 +34,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, isDigit ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -1309,15 +1309,25 @@ rawHtmlInline' = do cite :: GenParser Char ParserState Inline cite = do failIfStrict - textualCite <|> normalCite + citations <- textualCite <|> normalCite + return $ Cite citations [] spnl :: GenParser Char st () -spnl = try $ skipSpaces >> optional newline >> skipSpaces >> - notFollowedBy (char '\n') +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') -textualCite :: GenParser Char ParserState Inline +blankSpace :: GenParser Char st () +blankSpace = try $ do + res <- many1 $ oneOf " \t\n" + guard $ length res > 0 + guard $ length (filter (=='\n') res) <= 1 + +textualCite :: GenParser Char ParserState [Citation] textualCite = try $ do - key <- citeKey + (_, key) <- citeKey st <- getState unless (key `elem` stateCitations st) $ fail "not a citation" @@ -1329,73 +1339,80 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - option (Cite [first] []) $ try $ do - spnl - char '[' - spnl - bareloc <- option "" $ notFollowedBy (oneOf "-@") >> locator - rest <- many $ try $ do - optional $ char ';' - spnl - citation - spnl - char ']' - let first' = if null bareloc - then first - else first{ citationLocator = bareloc - , citationMode = AuthorInText } - return $ Cite (first' : rest) [] - -normalCite :: GenParser Char ParserState Inline + rest <- option [] $ try $ spnl >> normalCite + if null rest + then option [first] $ bareloc first + else return $ first : rest + +bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc c = try $ do + spnl + char '[' + spnl + loc <- locator + spnl + rest <- option [] $ try $ char ';' >> citeList + spnl + char ']' + return $ c{ citationLocator = loc } : rest + +normalCite :: GenParser Char ParserState [Citation] normalCite = try $ do - cites <- citeList - return $ Cite cites [] + char '[' + spnl + citations <- citeList + spnl + char ']' + return citations -citeKey :: GenParser Char st String +citeKey :: GenParser Char st (Bool, String) citeKey = try $ do + suppress_author <- option False (char '-' >> return True) char '@' first <- letter - rest <- many $ noneOf ",;]@ \t\n" - return (first:rest) + rest <- many $ (noneOf ",;]@ \t\n") + return (suppress_author, first:rest) locator :: GenParser Char st String locator = try $ do - optional $ char ',' spnl - many1 $ (char '\\' >> oneOf "];\n") <|> noneOf "];\n" <|> - (char '\n' >> notFollowedBy blankline >> return ' ') - -prefix :: GenParser Char st String -prefix = liftM removeLeadingTrailingSpace $ - many $ (char '\\' >> anyChar) <|> noneOf "-@]\n" <|> - (try $ char '-' >> notFollowedBy (char '@') >> return '-') <|> - (try $ char '\n' >> notFollowedBy blankline >> return ' ') - -citeList :: GenParser Char st [Citation] -citeList = try $ do - char '[' + w <- many1 (noneOf " \t\n;]") spnl - first <- citation + ws <- many locatorWord + return $ unwords $ w:ws + +locatorWord :: GenParser Char st String +locatorWord = try $ do + wd <- many1 $ (try $ char '\\' >> oneOf "]; \t\n") <|> noneOf "]; \t\n" spnl - rest <- many $ try $ do - char ';' - spnl - citation + if any isDigit wd + then return wd + else pzero + +suffix :: GenParser Char ParserState [Inline] +suffix = try $ do + char ',' spnl - char ']' - return (first:rest) + liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + +prefix :: GenParser Char ParserState [Inline] +prefix = liftM normalizeSpaces $ + manyTill inline (lookAhead citeKey) + +citeList :: GenParser Char ParserState [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char st Citation +citation :: GenParser Char ParserState Citation citation = try $ do pref <- prefix - suppress_auth <- option False (char '-' >> return True) - key <- citeKey - loc <- option "" locator + (suppress_author, key) <- citeKey + loc <- option "" $ try $ blankSpace >> locator + suff <- option [] suffix return $ Citation{ citationId = key - , citationPrefix = if pref /= [] then [Str pref] else [] - , citationSuffix = [] + , citationPrefix = pref + , citationSuffix = suff , citationLocator = loc - , citationMode = if suppress_auth + , citationMode = if suppress_author then SuppressAuthor else NormalCitation , citationNoteNum = 0 -- cgit v1.2.3