From fc24c79db6ca54bfd5e2a99e12421d7749514faa Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Sat, 6 Sep 2008 18:05:18 +0000
Subject: LaTeX reader:  improvements in raw LaTeX parsing.

+ "loose punctuation" (like {}) parsed as Space
+ Para elements must contain more than Str "" and Space elements
+ Added parser for "\ignore" command used in literate haskell.
+ Reworked unknownCommand and rawLaTeXInline: when not in "parse raw"
  mode, these parsers simply strip off the command part and allow
  the arguments to be parsed normally.  So, for example,
  \blorg{\emph{hi}} will be parsed as Emph "hi" rather than
  Str "{\\emph{hi}}".


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1420 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Text/Pandoc/Readers/LaTeX.hs | 61 +++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 20 deletions(-)

(limited to 'Text')

diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs
index 647899acf..4cecebda1 100644
--- a/Text/Pandoc/Readers/LaTeX.hs
+++ b/Text/Pandoc/Readers/LaTeX.hs
@@ -159,6 +159,7 @@ block = choice [ hrule
                , specialEnvironment
                , itemBlock
                , unknownEnvironment
+               , ignore
                , unknownCommand ] <?> "block"
 
 --
@@ -283,7 +284,12 @@ definitionList = try $ do
 --
 
 para :: GenParser Char ParserState Block
-para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
+para = do
+  res <- many1 inline
+  spaces
+  return $ if null (filter (`notElem` [Str "", Space]) res)
+              then Null
+              else Para $ normalizeSpaces res
 
 --
 -- title authors date
@@ -331,7 +337,7 @@ itemBlock :: GenParser Char ParserState Block
 itemBlock = try $ do
   ("item", _, args) <- command
   state <- getState
-  if (stateParserContext state == ListItemState)
+  if stateParserContext state == ListItemState
      then fail "item should be handled by list block"
      else if null args 
              then return Null
@@ -381,20 +387,33 @@ unknownEnvironment = try $ do
                else anyEnvironment      -- otherwise just the contents
   return result
 
+-- \ignore{} is used conventionally in literate haskell for definitions
+-- that are to be processed by the compiler but not printed.
+ignore :: GenParser Char ParserState Block
+ignore = try $ do
+  ("ignore", _, _) <- command
+  spaces
+  return Null
+
 unknownCommand :: GenParser Char ParserState Block
 unknownCommand = try $ do
-  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", 
+  notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
                                      "document"]
-  (name, star, args) <- command
-  spaces
-  let argStr = concat args
   state <- getState
-  if name == "item" && (stateParserContext state) == ListItemState
-     then fail "should not be parsed as raw"
-     else return ""
   if stateParseRaw state
-     then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
-     else return $ Plain [Str (joinWithSep " " args)]
+     then do
+        (name, star, args) <- command
+        spaces
+        if name == "item" && stateParserContext state == ListItemState
+           then fail "should not be parsed as raw"
+           else return ""
+        return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
+     else do -- skip unknown command, leaving arguments to be parsed
+        char '\\'
+        letter
+        many (letter <|> digit)
+        spaces
+        return Null
 
 -- latex comment
 comment :: GenParser Char st Block
@@ -523,9 +542,9 @@ escapedChar = do
   result <- escaped (oneOf " $%&_#{}\n")
   return $ if result == Str "\n" then Str " " else result
 
--- ignore standalone, nonescaped special characters
+-- treat nonescaped special characters as spaces
 unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
+unescapedChar = oneOf "`$^&_#{}|<>" >> return Space
 
 specialChar :: GenParser Char st Inline
 specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
@@ -727,12 +746,14 @@ footnote = try $ do
 -- | Parse any LaTeX command and return it in a raw TeX inline element.
 rawLaTeXInline :: GenParser Char ParserState Inline
 rawLaTeXInline = try $ do
-  (name, star, args) <- command
+  notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"]
   state <- getState
-  if ((name == "begin") || (name == "end") || (name == "item"))
-     then fail "not an inline command" 
-     else string ""
   if stateParseRaw state
-     then return $ TeX ("\\" ++ name ++ star ++ concat args)
-     else return $ Str (joinWithSep " " args)
-
+     then do
+        (name, star, args) <- command
+        return $ TeX ("\\" ++ name ++ star ++ concat args)
+     else do -- skip unknown command, leaving arguments to be parsed
+        char '\\'
+        letter
+        many (letter <|> digit)
+        return $ Str ""
-- 
cgit v1.2.3