From c2d3796439d56dff6c72e9b961d080a52cf634b9 Mon Sep 17 00:00:00 2001 From: Nathan Gass Date: Mon, 13 Dec 2010 21:32:04 +0100 Subject: Added support for latex cite commands in latex reader. --- src/Text/Pandoc/Readers/LaTeX.hs | 117 ++++++++++++++++++++++++++++++++++++--- tests/latex-reader.native | 8 +-- 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0bc13d2dd..f02d7a564 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe ) -import Data.Char ( chr ) +import Data.Char ( chr, toUpper ) import Data.List ( isPrefixOf, isSuffixOf ) import Control.Monad ( when ) @@ -50,7 +50,7 @@ readLaTeX = readWith parseLaTeX -- characters with special meaning specialChars :: [Char] -specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" +specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-" -- -- utility functions @@ -150,7 +150,7 @@ block = choice [ hrule , header , list , blockQuote - , comment + , commentBlock , bibliographic , para , itemBlock @@ -433,11 +433,10 @@ unknownCommand = try $ do else return $ Plain [Str $ concat args] commandsToIgnore :: [String] -commandsToIgnore = ["special","pdfannot","pdfstringdef"] +commandsToIgnore = ["special","pdfannot","pdfstringdef","bibliography"] --- latex comment -comment :: GenParser Char st Block -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null +commentBlock :: GenParser Char st Block +commentBlock = comment >> return Null -- -- inline @@ -469,12 +468,19 @@ inline = choice [ str , linebreak , accentedChar , nonbreakingSpace + , cite , specialChar , rawLaTeXInline' , escapedChar , unescapedChar + , comment ] "inline" + +-- latex comment +comment :: GenParser Char st Inline +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "") + accentedChar :: GenParser Char st Inline accentedChar = normalAccentedChar <|> specialAccentedChar @@ -564,7 +570,7 @@ escapedChar = do -- nonescaped special characters unescapedChar :: GenParser Char st Inline -unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c]) +unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c]) specialChar :: GenParser Char st Inline specialChar = choice [ spacer, interwordSpace, @@ -788,6 +794,101 @@ footnote = try $ do setInput rest return $ Note blocks +-- | citations +cite :: GenParser Char ParserState Inline +cite = simpleCite <|> complexNatbibCites + +simpleCiteArgs :: GenParser Char ParserState [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ (char '[') >> manyTill inline (char ']') + second <- optionMaybe $ (char '[') >> manyTill inline (char ']') + char '{' + keys <- many1Till citationLabel (char '}') + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> ([], s ) + (Just s , Just t ) -> (s , t ) + _ -> ([], []) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + + +simpleCite :: GenParser Char ParserState Inline +simpleCite = try $ do + char '\\' + let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]] + ++ ["footcitetext"] + normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]] + ++ biblatex + supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"] + intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]] + mintext = ["textcites"] + mnormal = map (++ "s") biblatex + cmdend = notFollowedBy (letter <|> char '*') + addUpper xs = xs ++ map (\(c:cs) -> toUpper c : cs) xs + toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t + (mode, multi) <- toparser normal (NormalCitation, False) + <|> toparser supress (SuppressAuthor, False) + <|> toparser intext (AuthorInText , False) + <|> toparser mnormal (NormalCitation, True ) + <|> toparser mintext (AuthorInText , True ) + cits <- if multi then + many1 simpleCiteArgs + else + simpleCiteArgs >>= \c -> return [c] + let (c:cs) = concat cits + cits' = case mode of + AuthorInText -> c {citationMode = mode} : cs + _ -> map (\a -> a {citationMode = mode}) (c:cs) + return $ Cite cits' [] + +complexNatbibCites :: GenParser Char ParserState Inline +complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical + +complexNatbibTextual :: GenParser Char ParserState Inline +complexNatbibTextual = try $ do + string "\\citeauthor{" + manyTill (noneOf "}") (char '}') + skipSpaces + Cite (c:cs) _ <- complexNatbibParenthetical + return $ Cite (c {citationMode = AuthorInText} : cs) [] + + +complexNatbibParenthetical :: GenParser Char ParserState Inline +complexNatbibParenthetical = try $ do + string "\\citetext{" + cits <- many1Till parseOne (char '}') + return $ Cite (concat cits) [] + where + parseOne = do + skipSpaces + pref <- many (notFollowedBy (oneOf "\\}") >> inline) + (Cite cites _) <- simpleCite + suff <- many (notFollowedBy (oneOf "\\};") >> inline) + skipSpaces + optional $ char ';' + return $ addPrefix pref $ addSuffix suff $ cites + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +citationLabel :: GenParser Char ParserState String +citationLabel = do + res <- many1 $ noneOf ",}" + optional $ char ',' + return $ removeLeadingTrailingSpace res + -- | Parse any LaTeX inline command and return it in a raw TeX inline element. rawLaTeXInline' :: GenParser Char ParserState Inline rawLaTeXInline' = do diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 921bf9d77..80cd7f4aa 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -272,7 +272,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA , HorizontalRule , Header 1 [Str "LaTeX"] , BulletList - [ [ Para [TeX "\\cite[22-23]{smith.1899}"] ] + [ [ Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22",Str "-",Str "23"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] []] ] , [ Para [TeX "\\doublespacing"] ] , [ Para [Math InlineMath "2+2=4"] ] , [ Para [Math InlineMath "x \\in y"] ] @@ -334,12 +334,12 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA , Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] , Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] , Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] -, Para [Str "With",Space,Link [Str "embedded",Space,Str "[brackets]"] ("/url/",""),Str "."] +, Para [Str "With",Space,Link [Str "embedded",Space,Str "[",Str "brackets",Str "]"] ("/url/",""),Str "."] , Para [Link [Str "b"] ("/url/",""),Space,Str "by",Space,Str "itself",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "link."] , Para [Str "Indented",Space,Link [Str "once"] ("/url",""),Str "."] , Para [Str "Indented",Space,Link [Str "twice"] ("/url",""),Str "."] , Para [Str "Indented",Space,Link [Str "thrice"] ("/url",""),Str "."] -, Para [Str "This",Space,Str "should",Space,Str "[not][]",Space,Str "be",Space,Str "a",Space,Str "link."] +, Para [Str "This",Space,Str "should",Space,Str "[",Str "not",Str "]",Str "[",Str "]",Space,Str "be",Space,Str "a",Space,Str "link."] , CodeBlock ("",[],[]) "[not]: /url" , Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] , Para [Str "Foo",Space,Link [Str "biz"] ("/url/",""),Str "."] @@ -367,7 +367,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA , Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "image"] ("movie.jpg",""),Space,Str "icon."] , HorizontalRule , Header 1 [Str "Footnotes"] -, Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Note [Para [Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "after",Space,Str "the",Space,Str "footnote",Space,Str "reference.",Space,Str "It",Space,Str "need",Space,Str "not",Space,Str "be",Space,Str "placed",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]],Space,Str "and",Space,Str "another.",Note [Para [Str "Here",Apostrophe,Str "s",Space,Str "the",Space,Str "long",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."],Para [Str "Subsequent",Space,Str "blocks",Space,Str "are",Space,Str "indented",Space,Str "to",Space,Str "show",Space,Str "that",Space,Str "they",Space,Str "belong",Space,Str "to",Space,Str "the",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "list",Space,Str "items)."],CodeBlock ("",[],[]) " { }",Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "indent",Space,Str "every",Space,Str "line,",Space,Str "but",Space,Str "you",Space,Str "can",Space,Str "also",Space,Str "be",Space,Str "lazy",Space,Str "and",Space,Str "just",Space,Str "indent",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "each",Space,Str "block."]],Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space.[",Str "^",Str "my",Space,Str "note]",Space,Str "Here",Space,Str "is",Space,Str "an",Space,Str "inline",Space,Str "note.",Note [Para [Str "This",Space,Str "is",Space,Emph [Str "easier"],Space,Str "to",Space,Str "type.",Space,Str "Inline",Space,Str "notes",Space,Str "may",Space,Str "contain",Space,Link [Str "links"] ("http://google.com",""),Space,Str "and",Space,Code "]",Space,Str "verbatim",Space,Str "characters,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "[bracketed",Space,Str "text]."]]] +, Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Note [Para [Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "after",Space,Str "the",Space,Str "footnote",Space,Str "reference.",Space,Str "It",Space,Str "need",Space,Str "not",Space,Str "be",Space,Str "placed",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]],Space,Str "and",Space,Str "another.",Note [Para [Str "Here",Apostrophe,Str "s",Space,Str "the",Space,Str "long",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."],Para [Str "Subsequent",Space,Str "blocks",Space,Str "are",Space,Str "indented",Space,Str "to",Space,Str "show",Space,Str "that",Space,Str "they",Space,Str "belong",Space,Str "to",Space,Str "the",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "list",Space,Str "items)."],CodeBlock ("",[],[]) " { }",Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "indent",Space,Str "every",Space,Str "line,",Space,Str "but",Space,Str "you",Space,Str "can",Space,Str "also",Space,Str "be",Space,Str "lazy",Space,Str "and",Space,Str "just",Space,Str "indent",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "each",Space,Str "block."]],Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space.",Str "[",Str "^",Str "my",Space,Str "note",Str "]",Space,Str "Here",Space,Str "is",Space,Str "an",Space,Str "inline",Space,Str "note.",Note [Para [Str "This",Space,Str "is",Space,Emph [Str "easier"],Space,Str "to",Space,Str "type.",Space,Str "Inline",Space,Str "notes",Space,Str "may",Space,Str "contain",Space,Link [Str "links"] ("http://google.com",""),Space,Str "and",Space,Code "]",Space,Str "verbatim",Space,Str "characters,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "[",Str "bracketed",Space,Str "text",Str "]",Str "."]]] , BlockQuote [ Para [Str "Notes",Space,Str "can",Space,Str "go",Space,Str "in",Space,Str "quotes.",Note [Para [Str "In",Space,Str "quote."]]] ] -- cgit v1.2.3