From fc24c79db6ca54bfd5e2a99e12421d7749514faa Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 6 Sep 2008 18:05:18 +0000 Subject: LaTeX reader: improvements in raw LaTeX parsing. + "loose punctuation" (like {}) parsed as Space + Para elements must contain more than Str "" and Space elements + Added parser for "\ignore" command used in literate haskell. + Reworked unknownCommand and rawLaTeXInline: when not in "parse raw" mode, these parsers simply strip off the command part and allow the arguments to be parsed normally. So, for example, \blorg{\emph{hi}} will be parsed as Emph "hi" rather than Str "{\\emph{hi}}". git-svn-id: https://pandoc.googlecode.com/svn/trunk@1420 788f1e2b-df1e-0410-8736-df70ead52e1b --- Text/Pandoc/Readers/LaTeX.hs | 61 +++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 20 deletions(-) (limited to 'Text/Pandoc/Readers') diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs index 647899acf..4cecebda1 100644 --- a/Text/Pandoc/Readers/LaTeX.hs +++ b/Text/Pandoc/Readers/LaTeX.hs @@ -159,6 +159,7 @@ block = choice [ hrule , specialEnvironment , itemBlock , unknownEnvironment + , ignore , unknownCommand ] "block" -- @@ -283,7 +284,12 @@ definitionList = try $ do -- para :: GenParser Char ParserState Block -para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces +para = do + res <- many1 inline + spaces + return $ if null (filter (`notElem` [Str "", Space]) res) + then Null + else Para $ normalizeSpaces res -- -- title authors date @@ -331,7 +337,7 @@ itemBlock :: GenParser Char ParserState Block itemBlock = try $ do ("item", _, args) <- command state <- getState - if (stateParserContext state == ListItemState) + if stateParserContext state == ListItemState then fail "item should be handled by list block" else if null args then return Null @@ -381,20 +387,33 @@ unknownEnvironment = try $ do else anyEnvironment -- otherwise just the contents return result +-- \ignore{} is used conventionally in literate haskell for definitions +-- that are to be processed by the compiler but not printed. +ignore :: GenParser Char ParserState Block +ignore = try $ do + ("ignore", _, _) <- command + spaces + return Null + unknownCommand :: GenParser Char ParserState Block unknownCommand = try $ do - notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", + notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", "document"] - (name, star, args) <- command - spaces - let argStr = concat args state <- getState - if name == "item" && (stateParserContext state) == ListItemState - then fail "should not be parsed as raw" - else return "" if stateParseRaw state - then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] - else return $ Plain [Str (joinWithSep " " args)] + then do + (name, star, args) <- command + spaces + if name == "item" && stateParserContext state == ListItemState + then fail "should not be parsed as raw" + else return "" + return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)] + else do -- skip unknown command, leaving arguments to be parsed + char '\\' + letter + many (letter <|> digit) + spaces + return Null -- latex comment comment :: GenParser Char st Block @@ -523,9 +542,9 @@ escapedChar = do result <- escaped (oneOf " $%&_#{}\n") return $ if result == Str "\n" then Str " " else result --- ignore standalone, nonescaped special characters +-- treat nonescaped special characters as spaces unescapedChar :: GenParser Char st Inline -unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "") +unescapedChar = oneOf "`$^&_#{}|<>" >> return Space specialChar :: GenParser Char st Inline specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] @@ -727,12 +746,14 @@ footnote = try $ do -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline = try $ do - (name, star, args) <- command + notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"] state <- getState - if ((name == "begin") || (name == "end") || (name == "item")) - then fail "not an inline command" - else string "" if stateParseRaw state - then return $ TeX ("\\" ++ name ++ star ++ concat args) - else return $ Str (joinWithSep " " args) - + then do + (name, star, args) <- command + return $ TeX ("\\" ++ name ++ star ++ concat args) + else do -- skip unknown command, leaving arguments to be parsed + char '\\' + letter + many (letter <|> digit) + return $ Str "" -- cgit v1.2.3