Complete rewrite of LaTeX reader.

* The new reader is more robust, accurate, and extensible. It is still quite incomplete, but it should be easier now to add features. * Text.Pandoc.Parsing: Added withRaw combinator. * Markdown reader: do escapedChar before raw latex inline. Otherwise we capture commands like \{. * Fixed latex citation tests for new citeproc. * Handle \include{} commands in latex. This is done in pandoc.hs, not the (pure) latex reader. But the reader exports the needed function, handleIncludes. * Moved err and warn from pandoc.hs to Shared. * Fixed tests - raw tex should sometimes have trailing space. * Updated lhs-test for highlighting-kate changes.
author: John MacFarlane <fiddlosopher@gmail.com> 2012-01-29 23:54:00 -0800
committer: John MacFarlane <fiddlosopher@gmail.com> 2012-02-04 09:56:43 -0800
commit: 75485c2f112cdc2e1f95f871d01cc356510166ae (patch)
tree: 2d495f048bd887403db41418c14445b54af1b00c /src/Text/Pandoc/Readers
parent: 23ca68a5c41ef68397ed2217e31bf6e720fb0534 (diff)
download: pandoc-75485c2f112cdc2e1f95f871d01cc356510166ae.tar.gz
2 files changed, 737 insertions, 981 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3d9689168..9eb9eb2f9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
+   Copyright   : Copyright (C) 2006-2012 John MacFarlane
+   License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
@@ -27,20 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of LaTeX to 'Pandoc' document.
 -}
-module Text.Pandoc.Readers.LaTeX ( 
-                                  readLaTeX,
-                                  rawLaTeXInline,
-                                  rawLaTeXEnvironment'
+module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+                                   rawLaTeXInline,
+                                   rawLaTeXBlock,
+                                   handleIncludes
                                  ) where
 
-import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr, toUpper )
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Data.Char ( chr, ord )
 import Control.Monad
+import Text.Pandoc.Builder
+import Data.Char (isLetter)
+import Control.Applicative
+import Data.Monoid
+import System.FilePath (replaceExtension)
+import qualified Data.Map as M
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
 readLaTeX :: ParserState   -- ^ Parser state, including options for parser
@@ -48,1002 +52,757 @@ readLaTeX :: ParserState   -- ^ Parser state, including options for parser
           -> Pandoc
 readLaTeX = readWith parseLaTeX
 
--- characters with special meaning
-specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText :: Char -> Char -> GenParser Char st [Char]
-bracketedText openB closeB = do
-  result <- charsInBalanced openB closeB anyChar
-  return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg :: GenParser Char st [Char]
-optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
-
--- | True if the string begins with '{'.
-isArg :: [Char] -> Bool
-isArg ('{':_) = True
-isArg _       = False
+parseLaTeX :: LP Pandoc
+parseLaTeX = do
+  bs <- blocks
+  eof
+  st <- getState
+  let title' = stateTitle st
+  let authors' = stateAuthors st
+  let date' = stateDate st
+  return $ Pandoc (Meta title' authors' date') $ toList bs
 
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs :: GenParser Char st [[Char]]
-commandArgs = many optOrArg
+type LP = GenParser Char ParserState
 
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command :: GenParser Char st ([Char], [Char], [[Char]])
-command = do
+anyControlSeq :: LP String
+anyControlSeq = do
   char '\\'
-  name <- many1 letter
-  star <- option "" (string "*")  -- some commands have starred versions
-  args <- commandArgs
-  return (name, star, args)
-
-begin :: [Char] -> GenParser Char st [Char]
-begin name = try $ do
-  string "\\begin"
-  spaces
-  char '{'
-  string name
-  char '}'
-  optional commandArgs
-  spaces
+  next <- option '\n' anyChar
+  name <- case next of
+               '\n'           -> return ""
+               c | isLetter c -> (c:) <$> (many letter <* optional sp)
+                 | otherwise  -> return [c]
   return name
 
-end :: [Char] -> GenParser Char st [Char]
-end name = try $ do
-  string "\\end"
-  spaces
-  char '{'
-  string name
-  char '}'
+controlSeq :: String -> LP String
+controlSeq name = try $ do
+  char '\\'
+  case name of
+        ""   -> mzero
+        [c] | not (isLetter c) -> string [c]
+        cs   -> string cs <* optional sp
   return name
 
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment :: [Char] -> GenParser Char ParserState [Block]
-environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
-
-anyEnvironment :: GenParser Char ParserState Block
-anyEnvironment =  try $ do
-  string "\\begin"
-  spaces
+sp :: LP ()
+sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+        <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+tildeEscape :: LP Char
+tildeEscape = try $ do
+  string "^^"
+  c <- satisfy (\x -> x >= '\0' && x <= '\128')
+  d <- if isLowerHex c
+          then option "" $ count 1 (satisfy isLowerHex)
+          else return ""
+  if null d
+     then case ord c of
+           x | x >= 64 && x <= 127 -> return $ chr (x - 64)
+             | otherwise           -> return $ chr (x + 64)
+     else return $ chr $ read ('0':'x':c:d)
+
+comment :: LP ()
+comment = do
+  char '%'
+  skipMany (satisfy (/='\n'))
+  newline
+  return ()
+
+grouped :: Monoid a => LP a -> LP a
+grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}'))
+
+braced :: LP String
+braced = char '{' *> (concat <$> manyTill
+         (  many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
+        <|> try (string "\\}")
+        <|> try (string "\\{")
+        <|> ((\x -> "{" ++ x ++ "}") <$> braced)
+        <|> count 1 anyChar
+         ) (char '}'))
+
+bracketed :: Monoid a => LP a -> LP a
+bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
+
+trim :: String -> String
+trim = removeLeadingTrailingSpace
+
+mathDisplay :: LP String -> LP Inlines
+mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
+
+mathInline :: LP String -> LP Inlines
+mathInline p = math <$> (try p >>= applyMacros')
+
+double_quote :: LP Inlines
+double_quote = (doubleQuoted . mconcat) <$>
+  (try $ string "``" *> manyTill inline (try $ string "''"))
+
+single_quote :: LP Inlines
+single_quote = (singleQuoted . mconcat) <$>
+  (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
+
+inline :: LP Inlines
+inline = (mempty <$ comment)
+     <|> (space  <$ sp)
+     <|> inlineText
+     <|> inlineCommand
+     <|> grouped inline
+     <|> (char '-' *> option (str "-")
+           ((char '-') *> option (str "–") (str "—" <$ char '-')))
+     <|> double_quote
+     <|> single_quote
+     <|> (str "’" <$ char '\'')
+     <|> (str "\160" <$ char '~')
+     <|> (mathDisplay $ string "$$" *> manyTill anyChar (try $ string "$$"))
+     <|> (mathInline  $ char '$' *> manyTill anyChar (char '$'))
+     <|> (superscript <$> (char '^' *> tok))
+     <|> (subscript <$> (char '_' *> tok))
+     <|> (failUnlessLHS *> char '|' *> doLHSverb)
+     <|> (str <$> count 1 tildeEscape)
+     <|> (str <$> string "]")
+     <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
+
+inlines :: LP Inlines
+inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+
+block :: LP Blocks
+block = (mempty <$ comment)
+    <|> (mempty <$ ((spaceChar <|> blankline) *> spaces))
+    <|> environment
+    <|> mempty <$ macro -- TODO improve macros, make them work everywhere
+    <|> blockCommand
+    <|> grouped block
+    <|> paragraph
+
+
+blocks :: LP Blocks
+blocks = mconcat <$> many block
+
+blockCommand :: LP Blocks
+blockCommand = try $ do
+  name <- anyControlSeq
+  star <- option "" (string "*" <* optional sp)
+  let name' = name ++ star
+  case M.lookup name' blockCommands of
+       Just p      -> p
+       Nothing     -> case M.lookup name blockCommands of
+                           Just p    -> p
+                           Nothing   -> mzero
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = (str "[") <> x <> (str "]")
+
+blockCommands :: M.Map String (LP Blocks)
+blockCommands = M.fromList
+  [ ("par", pure mempty)
+  , ("title", mempty <$ (tok >>= addTitle))
+  , ("subtitle", mempty <$ (tok >>= addSubtitle))
+  , ("author", mempty <$ authors)
+  , ("date", mempty <$ (tok >>= addDate))
+  , ("maketitle", pure mempty)
+  -- \ignore{} is used conventionally in literate haskell for definitions
+  -- that are to be processed by the compiler but not printed.
+  , ("ignore", mempty <$ tok)
+  , ("hyperdef", mempty <$ (tok *> tok))
+  , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section 0)
+  , ("section", section 1)
+  , ("subsection", section 2)
+  , ("subsubsection", section 3)
+  , ("paragraph", section 4)
+  , ("subparagraph", section 5)
+  , ("opening", (para . trimInlines) <$> tok)
+  , ("closing", (para . trimInlines) <$> tok)
+  , ("rule", optional opt *> tok *> tok *> pure horizontalRule)
+  , ("begin", mzero)   -- these are here so they won't be interpreted as inline
+  , ("end", mzero)
+  , ("item", loose_item)
+  , ("documentclass", optional opt *> braced *> preamble)
+  -- should be parsed by macro, but we need this
+  -- here so these aren't parsed as inline
+  , ("newcommand", mempty <$ (tok *> optional opt *> tok))
+  , ("renewcommand", mempty <$ (tok *> optional opt *> tok))
+  , ("newenvironment", mempty <$ (tok *> tok *> tok))
+  , ("renewenvironment", mempty <$ (tok *> tok *> tok))
+  , ("special", pure mempty)
+  , ("pdfannot", pure mempty)
+  , ("pdfstringdef", pure mempty)
+  , ("index", pure mempty)
+  , ("bibliography", pure mempty)
+  ]
+
+addTitle :: Inlines -> LP ()
+addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
+
+addSubtitle :: Inlines -> LP ()
+addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
+                        toList (str ":" <> linebreak <> tit) })
+
+authors :: LP ()
+authors = try $ do
   char '{'
-  name <- many letter
-  star <- option "" (string "*") -- some environments have starred variants
-  char '}'
-  optional commandArgs
-  spaces
-  contents <- manyTill block (end (name ++ star))
-  spaces
-  return $ BlockQuote contents
-
---
--- parsing documents
---
+  let oneAuthor = mconcat <$> many1 (notFollowedBy' (controlSeq "and") >> inline)
+  auths <- sepBy oneAuthor (controlSeq "and")
+  updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
+
+addDate :: Inlines -> LP ()
+addDate dat = updateState (\s -> s{ stateDate = toList dat })
+
+section :: Int -> LP Blocks
+section lvl = do
+  hasChapters <- stateHasChapters `fmap` getState
+  let lvl' = if hasChapters then lvl + 1 else lvl
+  optional sp
+  optional opt
+  contents <- grouped inline
+  return $ header lvl' contents
+
+inlineCommand :: LP Inlines
+inlineCommand = try $ do
+  name <- anyControlSeq
+  guard $ not $ isBlockCommand name
+  parseRaw <- stateParseRaw `fmap` getState
+  star <- option "" (string "*")
+  let name' = name ++ star
+  case M.lookup name' inlineCommands of
+       Just p      -> p
+       Nothing     -> case M.lookup name inlineCommands of
+                           Just p    -> p
+                           Nothing
+                             | parseRaw  ->
+                                (rawInline "latex" . (('\\':name') ++)) <$>
+                                 (withRaw (optional opt *> many braced)
+                                      >>= applyMacros' . snd)
+                             | otherwise -> return mempty
+
+isBlockCommand :: String -> Bool
+isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+
+inlineCommands :: M.Map String (LP Inlines)
+inlineCommands = M.fromList
+  [ ("emph", emph <$> tok)
+  , ("textit", emph <$> tok)
+  , ("textsc", smallcaps <$> tok)
+  , ("sout", strikeout <$> tok)
+  , ("textsuperscript", superscript <$> tok)
+  , ("textsubscript", subscript <$> tok)
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("textbf", strong <$> tok)
+  , ("ldots", lit "…")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("label", inBrackets <$> tok)
+  , ("ref", inBrackets <$> tok)
+  , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
+  , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
+  , ("ensuremath", mathInline $ braced)
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  -- old TeX commands
+  , ("em", emph <$> inlines)
+  , ("it", emph <$> inlines)
+  , ("sl", emph <$> inlines)
+  , ("bf", strong <$> inlines)
+  , ("rm", inlines)
+  , ("itshape", emph <$> inlines)
+  , ("slshape", emph <$> inlines)
+  , ("scshape", smallcaps <$> inlines)
+  , ("bfseries", strong <$> inlines)
+  , ("/", pure mempty) -- italic correction
+  , ("cc", lit "ç")
+  , ("cC", lit "Ç")
+  , ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("sect", lit "§")
+  , ("`", option (str "`") $ try $ tok >>= accent grave)
+  , ("'", option (str "'") $ try $ tok >>= accent acute)
+  , ("^", option (str "^") $ try $ tok >>= accent hat)
+  , ("~", option (str "~") $ try $ tok >>= accent circ)
+  , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+  , ("i", lit "i")
+  , ("\\", linebreak <$ optional (bracketed inline *> optional sp))
+  , (",", pure mempty)
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+  , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+  , ("verb", doverb)
+  , ("lstinline", doverb)
+  , ("texttt", (code . stringify . toList) <$> tok)
+  , ("url", (unescapeURL <$> braced) >>= \url ->
+       pure (link url "" (codeWith ("",["url"],[]) url)))
+  , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
+       tok >>= \lab ->
+         pure (link url "" lab))
+  , ("includegraphics", optional opt *> (unescapeURL <$> braced) >>=
+       (\src -> pure (image src "" (str "image"))))
+  , ("cite", citation NormalCitation False)
+  , ("citep", citation NormalCitation False)
+  , ("citep*", citation NormalCitation False)
+  , ("citeal", citation NormalCitation False)
+  , ("citealp", citation NormalCitation False)
+  , ("citealp*", citation NormalCitation False)
+  , ("autocite", citation NormalCitation False)
+  , ("footcite", citation NormalCitation False)
+  , ("parencite", citation NormalCitation False)
+  , ("supercite", citation NormalCitation False)
+  , ("footcitetext", citation NormalCitation False)
+  , ("citeyearpar", citation SuppressAuthor False)
+  , ("citeyear", citation SuppressAuthor False)
+  , ("autocite*", citation SuppressAuthor False)
+  , ("cite*", citation SuppressAuthor False)
+  , ("parencite*", citation SuppressAuthor False)
+  , ("textcite", citation AuthorInText False)
+  , ("citet", citation AuthorInText False)
+  , ("citet*", citation AuthorInText False)
+  , ("citealt", citation AuthorInText False)
+  , ("citealt*", citation AuthorInText False)
+  , ("textcites", citation AuthorInText True)
+  , ("cites", citation NormalCitation True)
+  , ("autocites", citation NormalCitation True)
+  , ("footcites", citation NormalCitation True)
+  , ("parencites", citation NormalCitation True)
+  , ("supercites", citation NormalCitation True)
+  , ("footcitetexts", citation NormalCitation True)
+  , ("Autocite", citation NormalCitation False)
+  , ("Footcite", citation NormalCitation False)
+  , ("Parencite", citation NormalCitation False)
+  , ("Supercite", citation NormalCitation False)
+  , ("Footcitetext", citation NormalCitation False)
+  , ("Citeyearpar", citation SuppressAuthor False)
+  , ("Citeyear", citation SuppressAuthor False)
+  , ("Autocite*", citation SuppressAuthor False)
+  , ("Cite*", citation SuppressAuthor False)
+  , ("Parencite*", citation SuppressAuthor False)
+  , ("Textcite", citation AuthorInText False)
+  , ("Textcites", citation AuthorInText True)
+  , ("Cites", citation NormalCitation True)
+  , ("Autocites", citation NormalCitation True)
+  , ("Footcites", citation NormalCitation True)
+  , ("Parencites", citation NormalCitation True)
+  , ("Supercites", citation NormalCitation True)
+  , ("Footcitetexts", citation NormalCitation True)
+  , ("citetext", complexNatbibCitation NormalCitation)
+  , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+                        complexNatbibCitation AuthorInText)
+                   <|> citation AuthorInText False)
+  ]
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+  where isEscapable '%' = True
+        isEscapable '#' = True
+        isEscapable _   = False
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+doverb :: LP Inlines
+doverb = do
+  marker <- anyChar
+  code <$> manyTill (satisfy (/='\n')) (char marker)
+
+doLHSverb :: LP Inlines
+doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+
+lit :: String -> LP Inlines
+lit = pure . str
+
+accent :: (Char -> Char) -> Inlines -> LP Inlines
+accent f ils =
+  case toList ils of
+       (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys)
+       []                -> mzero
+       _                 -> return ils
+
+grave :: Char -> Char
+grave 'A' = 'À'
+grave 'E' = 'È'
+grave 'I' = 'Ì'
+grave 'O' = 'Ò'
+grave 'U' = 'Ù'
+grave 'a' = 'à'
+grave 'e' = 'è'
+grave 'i' = 'ì'
+grave 'o' = 'ò'
+grave 'u' = 'ù'
+grave c   = c
+
+acute :: Char -> Char
+acute 'A' = 'Á'
+acute 'E' = 'É'
+acute 'I' = 'Í'
+acute 'O' = 'Ó'
+acute 'U' = 'Ú'
+acute 'a' = 'á'
+acute 'e' = 'é'
+acute 'i' = 'í'
+acute 'o' = 'ó'
+acute 'u' = 'ú'
+acute c = c
+
+hat :: Char -> Char
+hat 'A' = 'Â'
+hat 'E' = 'Ê'
+hat 'I' = 'Î'
+hat 'O' = 'Ô'
+hat 'U' = 'Û'
+hat 'a' = 'ã'
+hat 'e' = 'ê'
+hat 'i' = 'î'
+hat 'o' = 'ô'
+hat 'u' = 'û'
+hat c = c
+
+circ :: Char -> Char
+circ 'A' = 'Ã'
+circ 'O' = 'Õ'
+circ 'o' = 'õ'
+circ 'N' = 'Ñ'
+circ 'n' = 'ñ'
+circ c   = c
+
+umlaut :: Char -> Char
+umlaut 'A' = 'Ä'
+umlaut 'E' = 'Ë'
+umlaut 'I' = 'Ï'
+umlaut 'O' = 'Ö'
+umlaut 'U' = 'Ü'
+umlaut 'a' = 'ä'
+umlaut 'e' = 'ë'
+umlaut 'i' = 'ï'
+umlaut 'o' = 'ö'
+umlaut 'u' = 'ü'
+umlaut c = c
+
+tok :: LP Inlines
+tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+
+opt :: LP Inlines
+opt = bracketed inline <* optional sp
+
+inlineText :: LP Inlines
+inlineText = str <$> many1 inlineChar
+
+inlineChar :: LP Char
+inlineChar = satisfy $ \c ->
+  not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' ||
+       c == '&'  || c == '~' || c == '#' || c == '{' || c == '}' ||
+       c == '^'  || c == '\'' || c == '`' || c == '-' || c == ']' ||
+       c == ' '  || c == '\t' || c == '\n' )
+
+environment :: LP Blocks
+environment = do
+  controlSeq "begin"
+  name <- braced
+  parseRaw <- stateParseRaw `fmap` getState
+  let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+  case M.lookup name environments of
+       Just p      -> p
+       Nothing     -> if parseRaw
+                         then (rawBlock "latex" . addBegin) <$>
+                              (withRaw (env name blocks) >>= applyMacros' . snd)
+                         else env name blocks
+
+-- | Replace "include" commands with file contents.
+handleIncludes :: String -> IO String
+handleIncludes [] = return []
+handleIncludes ('\\':xs) =
+  case runParser include defaultParserState "input" ('\\':xs) of
+       Right (f, rest) -> do ys <- catch (readFile (replaceExtension f ".tex"))
+                                    (\e -> warn
+                                      ("could not open included file `" ++
+                                       f ++ "': " ++ show e) >> return "")
+                             (ys ++) `fmap` handleIncludes rest
+       _  -> case runParser verbatimEnv defaultParserState "input" ('\\':xs) of
+                    Right (r, rest) -> (r ++) `fmap` handleIncludes rest
+                    _               -> ('\\':) `fmap` handleIncludes xs
+handleIncludes (x:xs) = (x:) `fmap` handleIncludes xs
+
+include :: LP (FilePath, String)
+include = do
+  controlSeq "include"
+  f <- braced
+  rest <- getInput
+  return (f, rest)
+
+verbatimEnv :: LP (String, String)
+verbatimEnv = do
+  (_,r) <- withRaw $ do
+             controlSeq "begin"
+             name <- braced
+             guard $ name == "verbatim" || name == "Verbatim" ||
+                     name == "lstlisting"
+             verbEnv name
+  rest <- getInput
+  return (r,rest)
 
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = do
-  try $ string "\\documentclass"
-  skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
+-- | Parse any LaTeX environment and return a string containing
+-- the whole literal environment as raw TeX.
+rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock =
+  (rawLaTeXEnvironment <|> (snd <$> withRaw blockCommand)) >>= applyMacros'
 
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX :: GenParser Char ParserState Pandoc
-parseLaTeX = do
-  spaces
-  skipMany $ comment >> spaces
-  blocks <- try (processLaTeXPreamble >> environment "document")
-           <|> (many block >>~ (spaces >> eof))
-  state <- getState
-  let blocks' = filter (/= Null) blocks
-  let title' = stateTitle state
-  let authors' = stateAuthors state
-  let date' = stateDate state
-  return $ Pandoc (Meta title' authors' date')  blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> many block
-
-block :: GenParser Char ParserState Block
-block = choice [ hrule
-               , codeBlock
-               , header
-               , list
-               , blockQuote
-               , simpleTable
-               , commentBlock
-               , macro
-               , bibliographic
-               , para
-               , itemBlock
-               , unknownEnvironment
-               , ignore
-               , unknownCommand
-               ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = section <|> chapter
-
-chapter :: GenParser Char ParserState Block
-chapter = try $ do
-  string "\\chapter"
-  result <- headerWithLevel 1
-  updateState $ \s -> s{ stateHasChapters = True }
-  return result
-
-section :: GenParser Char ParserState Block
-section = try $ do
-  char '\\'
-  subs <- many (try (string "sub"))
-  base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
-  st <- getState
-  let lev = if stateHasChapters st
-               then length subs + base + 1
-               else length subs + base
-  headerWithLevel lev
+rawLaTeXEnvironment :: GenParser Char ParserState String
+rawLaTeXEnvironment = try $ do
+  controlSeq "begin"
+  name <- braced
+  let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+  addBegin <$> (withRaw (env name blocks) >>= applyMacros' . snd)
 
-headerWithLevel :: Int -> GenParser Char ParserState Block
-headerWithLevel lev = try $ do
-  spaces
-  optional (char '*')
-  spaces
-  optional $ bracketedText '[' ']' -- alt title
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = do
+  (res, raw) <- withRaw inlineCommand
+  if res == mempty
+     then return (Str "")
+     else RawInline "latex" <$> (applyMacros' raw)
+
+environments :: M.Map String (LP Blocks)
+environments = M.fromList
+  [ ("document", env "document" blocks)
+  , ("letter", env "letter" blocks)
+  , ("center", env "center" blocks)
+  , ("tabular", env "tabular" simpTable)
+  , ("quote", blockQuote <$> env "quote" blocks)
+  , ("quotation", blockQuote <$> env "quotation" blocks)
+  , ("itemize", bulletList <$> listenv "itemize" (many item))
+  , ("description", definitionList <$> listenv "description" (many descItem))
+  , ("enumerate", ordered_list)
+  , ("code", failUnlessLHS *>
+      (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code"))
+  , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+  , ("Verbatim", codeBlock <$> (verbEnv "Verbatim"))
+  , ("lstlisting", codeBlock <$> (verbEnv "listlisting"))
+  , ("displaymath", mathEnv Nothing "displaymath")
+  , ("equation", mathEnv Nothing "equation")
+  , ("equation*", mathEnv Nothing "equation*")
+  , ("gather", mathEnv (Just "gathered") "gather")
+  , ("gather*", mathEnv (Just "gathered") "gather*")
+  , ("multiline", mathEnv (Just "gathered") "multiline")
+  , ("multiline*", mathEnv (Just "gathered") "multiline*")
+  , ("eqnarray", mathEnv (Just "aligned*") "eqnarray")
+  , ("eqnarray*", mathEnv (Just "aligned*") "eqnarray*")
+  , ("align", mathEnv (Just "aligned*") "align")
+  , ("align*", mathEnv (Just "aligned*") "align*")
+  , ("alignat", mathEnv (Just "aligned*") "alignat")
+  , ("alignat*", mathEnv (Just "aligned*") "alignat*")
+  ]
+
+item :: LP Blocks
+item = blocks *> controlSeq "item" *> optional opt *> blocks
+
+loose_item :: LP Blocks
+loose_item = do
+  ctx <- stateParserContext `fmap` getState
+  if ctx == ListItemState
+     then mzero
+     else return mempty
+
+descItem :: LP (Inlines, [Blocks])
+descItem = do
+  blocks -- skip blocks before item
+  controlSeq "item"
+  optional sp
+  ils <- opt
+  bs <- blocks
+  return (ils, [bs])
+
+env :: String -> LP a -> LP a
+env name p = p <* (controlSeq "end" *> braced >>= guard . (== name))
+
+listenv :: String -> LP a -> LP a
+listenv name p = try $ do
+  oldCtx <- stateParserContext `fmap` getState
+  updateState $ \st -> st{ stateParserContext = ListItemState }
+  res <- env name p
+  updateState $ \st -> st{ stateParserContext = oldCtx }
+  return res
+
+mathEnv :: Maybe String -> String -> LP Blocks
+mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+   where inner x = case innerEnv of
+                      Nothing -> x
+                      Just y  -> "\\begin{" ++ y ++ "}\n" ++ x ++
+                                    "\\end{" ++ y ++ "}"
+
+verbEnv :: String -> LP String
+verbEnv name = do
+  optional opt
+  optional blankline
+  let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
+  res <- manyTill anyChar endEnv
+  return $ stripTrailingNewlines res
+
+ordered_list :: LP Blocks
+ordered_list = do
+  optional sp
+  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+                              try $ char '[' *> anyOrderedListMarker <* char ']'
   spaces
-  char '{'
-  title' <- manyTill inline (char '}')
+  optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
   spaces
-  return $ Header lev (normalizeSpaces title')
+  start <- option 1 $ try $ do controlSeq "setcounter"
+                               grouped (string "enum" *> many1 (oneOf "iv"))
+                               optional sp
+                               num <- grouped (many1 digit)
+                               spaces
+                               return $ (read num + 1 :: Int)
+  bs <- listenv "enumerate" (many item)
+  return $ orderedListWith (start, style, delim) bs
+
+paragraph :: LP Blocks
+paragraph = do
+  x <- mconcat <$> many1 inline
+  if x == mempty
+     then return mempty
+     else return $ para $ trimInlines x
+
+preamble :: LP Blocks
+preamble = mempty <$> manyTill preambleBlock beginDoc
+  where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+        preambleBlock =  (mempty <$ comment)
+                     <|> (mempty <$ sp)
+                     <|> (mempty <$ blanklines)
+                     <|> (mempty <$ macro)
+                     <|> blockCommand
+                     <|> (mempty <$ anyControlSeq)
+                     <|> (mempty <$ braced)
+                     <|> (mempty <$ anyChar)
+
+-------
+
+-- citations
+
+addPrefix :: Inlines -> [Citation] -> [Citation]
+addPrefix p (k:ks)   = k {citationPrefix = toList p ++ citationPrefix k} : ks
+addPrefix _ _ = []
 
---
--- hrule block
---
+addSuffix :: Inlines -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ toList s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: LP [Citation]
+simpleCiteArgs = try $ do
+  first  <- optionMaybe opt
+  second <- optionMaybe opt
+  char '{'
+  keys <- manyTill citationLabel (char '}')
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
 
-hrule :: GenParser Char st Block
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", 
-                       "\\newpage" ] >> spaces >> return HorizontalRule
+citationLabel :: LP String
+citationLabel  = trim <$>
+  (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
 
--- tables
+cites :: CitationMode -> Bool -> LP [Citation]
+cites mode multi = try $ do
+  cits <- if multi
+             then many1 simpleCiteArgs
+             else count 1 simpleCiteArgs
+  let (c:cs) = concat cits
+  return $ case mode of
+        AuthorInText   -> c {citationMode = mode} : cs
+        _              -> map (\a -> a {citationMode = mode}) (c:cs)
 
-simpleTable :: GenParser Char ParserState Block
-simpleTable = try $ do
-  string "\\begin"
-  spaces
-  string "{tabular}"
-  spaces
-  aligns <- parseAligns
-  let cols = length aligns
-  optional hline
-  header' <- option [] $ parseTableHeader cols
-  rows <- many (parseTableRow cols >>~ optional hline)
-  spaces
-  end "tabular"
-  spaces
-  let header'' = if null header'
-                    then replicate cols []
-                    else header'
-  return $ Table [] aligns (replicate cols 0) header'' rows
+citation :: CitationMode -> Bool -> LP Inlines
+citation mode multi = (flip cite mempty) <$> cites mode multi
+
+complexNatbibCitation :: CitationMode -> LP Inlines
+complexNatbibCitation mode = try $ do
+  let ils = (trimInlines . mconcat) <$>
+              many (notFollowedBy (oneOf "\\};") >> inline)
+  let parseOne = try $ do
+                   skipSpaces
+                   pref  <- ils
+                   cit' <- inline -- expect a citation
+                   let citlist = toList cit'
+                   cits' <- case citlist of
+                                 [Cite cs _] -> return cs
+                                 _           -> mzero
+                   suff  <- ils
+                   skipSpaces
+                   optional $ char ';'
+                   return $ addPrefix pref $ addSuffix suff $ cits'
+  (c:cits) <- grouped parseOne
+  return $ cite (c{ citationMode = mode }:cits) mempty
 
-hline :: GenParser Char st ()
-hline = try $ spaces >> string "\\hline" >> return ()
+-- tables
 
-parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns :: LP [Alignment]
 parseAligns = try $ do
   char '{'
   optional $ char '|'
   let cAlign = char 'c' >> return AlignCenter
   let lAlign = char 'l' >> return AlignLeft
   let rAlign = char 'r' >> return AlignRight
-  let alignChar = cAlign <|> lAlign <|> rAlign
+  let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
   aligns' <- sepEndBy alignChar (optional $ char '|')
+  spaces
   char '}'
   spaces
   return aligns'
 
-parseTableHeader :: Int   -- ^ number of columns
-                 -> GenParser Char ParserState [TableCell]
-parseTableHeader cols = try $ do
-  cells' <- parseTableRow cols
-  hline
-  return cells'
+hline :: LP ()
+hline = () <$ (try $ spaces >> controlSeq "hline")
 
 parseTableRow :: Int  -- ^ number of columns
-              -> GenParser Char ParserState [TableCell]
+              -> LP [Blocks]
 parseTableRow cols = try $ do
-  let tableCellInline = notFollowedBy (char '&' <|>
-                          (try $ char '\\' >> char '\\')) >> inline
-  cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
-             (many tableCellInline)) (char '&')
+  let amp = try $ spaces *> string "&"
+  let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline
+  cells' <- sepBy (spaces *> ((plain . trimInlines . mconcat) <$>
+              many tableCellInline)) amp
   guard $ length cells' == cols
   spaces
-  (try $ string "\\\\" >> spaces) <|>
-    (lookAhead (end "tabular") >> return ())
+  try $ controlSeq "\\" <|> lookAhead (try $ controlSeq "end" >> string "{tabular}")
   return cells'
 
---
--- code blocks
---
-
-codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
--- Note:  Verbatim is from fancyvrb.
-
-codeBlockWith :: String -> GenParser Char st Block
-codeBlockWith env = try $ do
-  string "\\begin"
-  spaces                      -- don't use begin function because it
-  string $ "{" ++ env ++ "}"  -- gobbles whitespace; we want to gobble
-  optional blanklines         -- blank lines, but not leading space
-  contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
-  spaces
-  let classes = if env == "code" then ["haskell"] else []
-  return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
-  failUnlessLHS
-  (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
-  return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= 
-             return . BlockQuote
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem :: GenParser Char ParserState ([Inline], [Block])
-listItem = try $ do
-  ("item", _, args) <- command
-  spaces
-  state <- getState
-  let oldParserContext = stateParserContext state
-  updateState (\s -> s {stateParserContext = ListItemState})
-  blocks <- many block
-  updateState (\s -> s {stateParserContext = oldParserContext})
-  opt <- case args of
-           ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> 
-                       parseFromString (many inline) $ tail $ init x
-           _        -> return []
-  return (opt, blocks)
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
-  string "\\begin"
-  spaces
-  string "{enumerate}"
-  spaces
-  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
-                              try $ do failIfStrict
-                                       char '['
-                                       res <- anyOrderedListMarker
-                                       char ']'
-                                       return res
-  spaces
-  option "" $ try $ do string "\\setlength{\\itemindent}"
-                       char '{'
-                       manyTill anyChar (char '}')
-  spaces
-  start <- option 1 $ try $ do failIfStrict
-                               string "\\setcounter{enum"
-                               many1 (oneOf "iv")
-                               string "}{"
-                               num <- many1 digit
-                               char '}' 
-                               spaces
-                               return $ (read num) + 1
-  items <- many listItem
-  end "enumerate"
-  spaces
-  return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
-  begin "itemize"
-  items <- many listItem
-  end "itemize"
-  spaces
-  return (BulletList $ map snd items)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
-  begin "description"
-  items <- many listItem
-  end "description"
-  spaces
-  return $ DefinitionList $ map (\(t,d) -> (t,[d])) items
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = do
-  res <- many1 inline
-  spaces
-  return $ if null (filter (`notElem` [Str "", Space]) res)
-              then Null
-              else Para $ normalizeSpaces res
-
---
--- title authors date
---
-
-bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, subtitle, authors, date ]
-
-maketitle :: GenParser Char st Block
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title :: GenParser Char ParserState Block
-title = try $ do
-  string "\\title{"
-  tit <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateTitle = tit })
-  return Null
-
-subtitle :: GenParser Char ParserState Block
-subtitle = try $ do
-  string "\\subtitle{"
-  tit <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateTitle = stateTitle state ++
-                                   Str ":" : LineBreak : tit })
-  return Null
-
-authors :: GenParser Char ParserState Block
-authors = try $ do
-  string "\\author{"
-  let andsep = try $ string "\\and" >> notFollowedBy letter >>
-                     spaces >> return '&'
-  raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
-  let authors' = map normalizeSpaces raw
-  char '}'
-  spaces
-  updateState (\s -> s { stateAuthors = authors' })
-  return Null
-
-date :: GenParser Char ParserState Block
-date = try $ do
-  string "\\date{"
-  date' <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateDate = normalizeSpaces date' })
-  return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock :: GenParser Char ParserState Block
-itemBlock = try $ do
-  ("item", _, args) <- command
-  state <- getState
-  if stateParserContext state == ListItemState
-     then fail "item should be handled by list block"
-     else if null args 
-             then return Null
-             else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX 
---
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = do
-  contents <- rawLaTeXEnvironment'
-  spaces
-  return $ RawBlock "latex" contents
-
--- | Parse any LaTeX environment and return a string containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment' :: GenParser Char st String 
-rawLaTeXEnvironment' = try $ do
-  string "\\begin"
-  spaces
-  char '{'
-  name <- many1 letter
-  star <- option "" (string "*") -- for starred variants
-  let name' = name ++ star
-  char '}'
-  args <- option [] commandArgs
-  let argStr = concat args
-  contents <- manyTill (choice [ (many1 (noneOf "\\")), 
-                                 rawLaTeXEnvironment',
-                                 string "\\" ]) 
-                       (end name')
-  return $ "\\begin{" ++ name' ++ "}" ++ argStr ++ 
-                 concat contents ++ "\\end{" ++ name' ++ "}"
-
-unknownEnvironment :: GenParser Char ParserState Block
-unknownEnvironment = try $ do
-  state <- getState
-  result <- if stateParseRaw state -- check whether we should include raw TeX 
-               then rawLaTeXEnvironment -- if so, get whole raw environment
-               else anyEnvironment      -- otherwise just the contents
-  return result
-
--- \ignore{} is used conventionally in literate haskell for definitions
--- that are to be processed by the compiler but not printed.
-ignore :: GenParser Char ParserState Block
-ignore = try $ do
-  ("ignore", _, _) <- command
-  spaces
-  return Null
-
-demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
-demacro (n,st,args) = try $ do
-  let raw = "\\" ++ n ++ st ++ concat args
-  s' <- applyMacros' raw
-  if raw == s'
-     then return $ RawInline "latex" raw
-     else do
-       inp <- getInput
-       setInput $ s' ++ inp
-       return $ Str ""
-
-unknownCommand :: GenParser Char ParserState Block
-unknownCommand = try $ do
-  spaces
-  notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] >>
-                   notFollowedBy letter
-  state <- getState
-  when (stateParserContext state == ListItemState) $
-     notFollowedBy' (string "\\item")
-  if stateParseRaw state
-     then command >>= demacro >>= return . Plain . (:[])
-     else do
-        (name, _, args) <- command
-        spaces
-        unless (name `elem` commandsToIgnore) $ do
-        -- put arguments back in input to be parsed
-          inp <- getInput
-          setInput $ intercalate " " args ++ inp
-        return Null
-
-commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
-
-skipChar :: GenParser Char ParserState Block
-skipChar = do
-  satisfy (/='\\') <|>
-    (notFollowedBy' (try $
-                     string "\\begin" >> spaces >> string "{document}") >>
-     anyChar)
-  spaces
-  return Null
-
-commentBlock :: GenParser Char st Block
-commentBlock = many1 (comment >> spaces) >> return Null
-
--- 
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline =  choice [ str
-                 , endline
-                 , whitespace
-                 , quoted
-                 , apostrophe
-                 , strong
-                 , math
-                 , ellipses
-                 , emDash
-                 , enDash
-                 , hyphen
-                 , emph
-                 , strikeout
-                 , superscript
-                 , subscript
-                 , code
-                 , url
-                 , link
-                 , image
-                 , footnote
-                 , linebreak
-                 , accentedChar
-                 , nonbreakingSpace
-                 , cite
-                 , specialChar
-                 , ensureMath
-                 , rawLaTeXInline'
-                 , escapedChar
-                 , emptyGroup
-                 , unescapedChar
-                 , comment
-                 ] <?> "inline"
-
-
--- latex comment
-comment :: GenParser Char st Inline
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
-
-accentedChar :: GenParser Char st Inline
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar :: GenParser Char st Inline
-normalAccentedChar = try $ do
-  char '\\'
-  accent <- oneOf "'`^\"~"
-  character <- (try $ char '{' >> letter >>~ char '}') <|> letter
-  let table = fromMaybe [] $ lookup character accentTable 
-  let result = case lookup accent table of
-                 Just num  -> chr num
-                 Nothing   -> '?'
-  return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable :: [(Char, [(Char, Int)])]
-accentTable = 
-  [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
-    ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
-    ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
-    ('N', [('~', 209)]),
-    ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
-    ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
-    ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
-    ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
-    ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
-    ('n', [('~', 241)]),
-    ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
-    ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
-                               oslash, pound, euro, copyright, sect ]
-
-ccedil :: GenParser Char st Inline
-ccedil = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["cc", "cC"]
-  notFollowedBy letter
-  let num = if letter' == "cc" then 231 else 199
-  return $ Str [chr num]
-
-aring :: GenParser Char st Inline
-aring = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["aa", "AA"]
-  notFollowedBy letter
-  let num = if letter' == "aa" then 229 else 197
-  return $ Str [chr num]
-
-iuml :: GenParser Char st Inline
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> 
-       return (Str [chr 239])
-
-szlig :: GenParser Char st Inline
-szlig = try (string "\\ss") >> notFollowedBy letter >> return (Str [chr 223])
-
-oslash :: GenParser Char st Inline
-oslash = try $ do
-  char '\\'
-  letter' <- choice [char 'o', char 'O']
-  notFollowedBy letter
-  let num = if letter' == 'o' then 248 else 216
-  return $ Str [chr num]
-
-lslash :: GenParser Char st Inline
-lslash = try $ do
-  cmd <- oneOfStrings ["{\\L}","{\\l}"]
-       <|> (oneOfStrings ["\\L ","\\l "] >>~ notFollowedBy letter)
-  return $ if 'l' `elem` cmd
-              then Str "\x142"
-              else Str "\x141"
-
-aelig :: GenParser Char st Inline
-aelig = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["ae", "AE"]
-  notFollowedBy letter
-  let num = if letter' == "ae" then 230 else 198
-  return $ Str [chr num]
-
-pound :: GenParser Char st Inline
-pound = try (string "\\pounds" >> notFollowedBy letter) >> return (Str [chr 163])
-
-euro :: GenParser Char st Inline
-euro = try (string "\\euro" >> notFollowedBy letter) >> return (Str [chr 8364])
-
-copyright :: GenParser Char st Inline
-copyright = try (string "\\copyright" >> notFollowedBy letter) >> return (Str [chr 169])
-
-sect :: GenParser Char st Inline
-sect = try (string "\\S" >> notFollowedBy letter) >> return (Str [chr 167])
-
-escapedChar :: GenParser Char st Inline
-escapedChar = do
-  result <- escaped (oneOf specialChars)
-  return $ if result == '\n' then Str " " else Str [result]
+parseTableHeader :: Int   -- ^ number of columns
+                 -> LP [Blocks]
+parseTableHeader cols = try $ parseTableRow cols <* hline
 
-emptyGroup :: GenParser Char st Inline
-emptyGroup = try $ do
-  char '{'
+simpTable :: LP Blocks
+simpTable = try $ do
   spaces
-  char '}'
-  return $ Str ""
-
--- nonescaped special characters
-unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
-
-specialChar :: GenParser Char st Inline
-specialChar = choice [ spacer, interwordSpace, sentenceEnd,
-                       backslash, tilde, caret,
-                       bar, lt, gt, doubleQuote ]
-
-spacer :: GenParser Char st Inline
-spacer = try (string "\\,") >> return (Str "")
-
-sentenceEnd :: GenParser Char st Inline
-sentenceEnd = try (string "\\@") >> return (Str "")
-
-interwordSpace :: GenParser Char st Inline
-interwordSpace = try (string "\\ ") >> return (Str "\160")
-
-backslash :: GenParser Char st Inline
-backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
-
-tilde :: GenParser Char st Inline
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret :: GenParser Char st Inline
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar :: GenParser Char st Inline
-bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
-
-lt :: GenParser Char st Inline
-lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
-
-gt :: GenParser Char st Inline
-gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
-
-doubleQuote :: GenParser Char st Inline
-doubleQuote = char '"' >> return (Str "\"")
-
-code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> code3 <|> lhsInlineCode
-
-code1 :: GenParser Char st Inline
-code1 = try $ do 
-  string "\\verb"
-  marker <- anyChar
-  result <- manyTill anyChar (char marker)
-  return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-code2 :: GenParser Char st Inline
-code2 = try $ do
-  string "\\texttt{"
-  result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
-  return $ Code nullAttr result
-
-code3 :: GenParser Char st Inline
-code3 = try $ do 
-  string "\\lstinline"
-  marker <- anyChar
-  result <- manyTill anyChar (char marker)
-  return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-lhsInlineCode :: GenParser Char ParserState Inline
-lhsInlineCode = try $ do
-  failUnlessLHS
-  char '|'
-  result <- manyTill (noneOf "|\n") (char '|')
-  return $ Code ("",["haskell"],[]) result
-
-emph :: GenParser Char ParserState Inline
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
-             manyTill inline (char '}') >>= return . Emph
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
-                  return . Strikeout
-
-superscript :: GenParser Char ParserState Inline
-superscript = try $ string "\\textsuperscript{" >> 
-                    manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript :: GenParser Char ParserState Inline
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
-                  return . Subscript
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = char '\'' >> return (Str "\x2019")
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
-               return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
-               return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart :: GenParser Char st Char
-singleQuoteStart = char '`'
-
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart :: CharParser st String
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd :: CharParser st String
-doubleQuoteEnd = try $ string "''"
-
-ellipses :: GenParser Char st Inline
-ellipses = try $ do
-  char '\\'
-  optional $ char 'l'
-  string "dots"
-  optional $ try $ string "{}"
-  return (Str "…")
-
-enDash :: GenParser Char st Inline
-enDash = try (string "--") >> return (Str "-")
-
-emDash :: GenParser Char st Inline
-emDash = try (string "---") >> return (Str "—")
-
-hyphen :: GenParser Char st Inline
-hyphen = char '-' >> return (Str "-")
-
-strong :: GenParser Char ParserState Inline
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
-         return . Strong
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 (oneOf " \t") >> return Space
-
-nonbreakingSpace :: GenParser Char st Inline
-nonbreakingSpace = char '~' >> return (Str "\160")
-
--- hard line break
-linebreak :: GenParser Char st Inline
-linebreak = try $ do
-  string "\\\\"
-  optional $ bracketedText '[' ']'  -- e.g. \\[10pt]
+  aligns <- parseAligns
+  let cols = length aligns
+  optional hline
+  header' <- option [] $ parseTableHeader cols
+  rows <- many (parseTableRow cols <* optional hline)
   spaces
-  return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline :: GenParser Char st Inline
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math :: GenParser Char ParserState Inline
-math =   (math3 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
-     <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
-     <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
-     <?> "math"
-
-math1 :: GenParser Char st String 
-math1 = try $ char '$' >> manyTill anyChar (char '$')
-
-math2 :: GenParser Char st String
-math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
-
-math3 :: GenParser Char st String 
-math3 = try $ char '$' >> math1 >>~ char '$'
-
-math4 :: GenParser Char st String
-math4 = try $ do
-  name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
-           begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
-             begin "multline" <|> begin "multline*"
-  manyTill anyChar (end name)
-
-math5 :: GenParser Char st String
-math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
-
-math6 :: GenParser Char st String
-math6 = try $ do
-  name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
-           begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
-             begin "split" <|> begin "aligned" <|> begin "alignedat"
-  res <- manyTill anyChar (end name)
-  return $ filter (/= '&') res  -- remove alignment codes
-
-ensureMath :: GenParser Char st Inline
-ensureMath = try $ do
-  (n, _, args) <- command
-  guard $ n == "ensuremath" && not (null args)
-  return $ Math InlineMath $ tail $ init $ head args
-
---
--- links and images
---
-
-url :: GenParser Char ParserState Inline
-url = try $ do
-  string "\\url"
-  url' <- charsInBalanced '{' '}' anyChar
-  return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
-
-link :: GenParser Char ParserState Inline
-link = try $ do
-  string "\\href{"
-  url' <- manyTill anyChar (char '}')
-  char '{'
-  label' <- manyTill inline (char '}') 
-  return $ Link (normalizeSpaces label') (escapeURI url', "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
-  ("includegraphics", _, args) <- command
-  let args' = filter isArg args -- filter out options
-  let (src,tit) = case args' of
-                       []    -> ("", "")
-                       (x:_) -> (stripFirstAndLast x, "")
-  return $ Image [Str "image"] (escapeURI src, tit)
-
-footnote :: GenParser Char ParserState Inline
-footnote = try $ do
-  (name, _, (contents:[])) <- command
-  if ((name == "footnote") || (name == "thanks"))
-     then string ""
-     else fail "not a footnote or thanks command"
-  let contents' = stripFirstAndLast contents
-  -- parse the extracted block, which may contain various block elements:
-  rest <- getInput
-  setInput $ contents'
-  blocks <- parseBlocks
-  setInput rest
-  return $ Note blocks
-
--- | citations
-cite :: GenParser Char ParserState Inline
-cite = simpleCite <|> complexNatbibCites
-
-simpleCiteArgs :: GenParser Char ParserState [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ (char '[') >> manyTill inline (char ']')
-  second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
-  char '{'
-  keys <- many1Till citationLabel (char '}')
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> ([], s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> ([], [])
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-
-simpleCite :: GenParser Char ParserState Inline
-simpleCite = try $ do
-  char '\\'
-  let biblatex     = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
-                     ++ ["footcitetext"]
-      normal       = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
-                     ++ biblatex
-      supress      = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
-      intext       = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
-      mintext      = ["textcites"]
-      mnormal      = map (++ "s") biblatex
-      cmdend       = notFollowedBy (letter <|> char '*')
-      capit []     = []
-      capit (x:xs) = toUpper x : xs
-      addUpper xs  = xs ++ map capit xs
-      toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
-  (mode, multi) <-  toparser normal  (NormalCitation, False)
-                <|> toparser supress (SuppressAuthor, False)
-                <|> toparser intext  (AuthorInText  , False)
-                <|> toparser mnormal (NormalCitation, True )
-                <|> toparser mintext (AuthorInText  , True )
-  cits <- if multi then
-            many1 simpleCiteArgs
-          else
-            simpleCiteArgs >>= \c -> return [c]
-  let (c:cs) = concat cits
-      cits'  = case mode of
-        AuthorInText   -> c {citationMode = mode} : cs
-        _              -> map (\a -> a {citationMode = mode}) (c:cs)
-  return $ Cite cits' []
-
-complexNatbibCites :: GenParser Char ParserState Inline
-complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
-
-complexNatbibTextual :: GenParser Char ParserState Inline
-complexNatbibTextual = try $ do
-  string "\\citeauthor{"
-  manyTill (noneOf "}") (char '}')
-  skipSpaces
-  Cite (c:cs) _ <- complexNatbibParenthetical
-  return $ Cite (c {citationMode = AuthorInText} : cs) []
-
-
-complexNatbibParenthetical :: GenParser Char ParserState Inline
-complexNatbibParenthetical = try $ do
-  string "\\citetext{"
-  cits <- many1Till parseOne (char '}')
-  return $ Cite (concat cits) []
-  where
-    parseOne = do
-                 skipSpaces
-                 pref           <- many (notFollowedBy (oneOf "\\}") >> inline)
-                 (Cite cites _) <- simpleCite
-                 suff           <- many (notFollowedBy (oneOf "\\};") >> inline)
-                 skipSpaces
-                 optional $ char ';'
-                 return $ addPrefix pref $ addSuffix suff $ cites
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks)   = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) = let k = last ks
-                        in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-citationLabel :: GenParser Char ParserState String
-citationLabel  = do
-  res <- many1 $ noneOf ",}"
-  optional $ char ','
-  return $ removeLeadingTrailingSpace res
-
--- | Parse any LaTeX inline command and return it in a raw TeX inline element.
-rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
-  notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
-                                 "\\section"]
-  rawLaTeXInline
+  let header'' = if null header'
+                    then replicate cols mempty
+                    else header'
+  return $ table mempty (zip aligns (repeat 0)) header'' rows
 
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
-  state <- getState
-  if stateParseRaw state
-     then command >>= demacro
-     else do
-        (name,st,args) <- command
-        x <- demacro (name,st,args)
-        unless (x == Str "" || name `elem` commandsToIgnore) $ do
-          inp <- getInput
-          setInput $ intercalate " " args ++ inp
-        return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c78727715..8da0f7c16 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
 import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
@@ -729,8 +729,8 @@ rawVerbatimBlock = try $ do
 rawTeXBlock :: GenParser Char ParserState Block
 rawTeXBlock = do
   failIfStrict
-  result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
-          <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+  result <- liftM (RawBlock "latex") rawLaTeXBlock
+          <|> liftM (RawBlock "context") rawConTeXtEnvironment
   spaces
   return result
 
@@ -933,8 +933,8 @@ inlineParsers = [ whitespace
                 , inlineNote  -- after superscript because of ^[link](/foo)^
                 , autoLink
                 , rawHtmlInline
-                , rawLaTeXInline'
                 , escapedChar
+                , rawLaTeXInline'
                 , exampleRef
                 , smartPunctuation inline
                 , charRef
@@ -977,8 +977,7 @@ symbol :: GenParser Char ParserState Inline
 symbol = do 
   result <- noneOf "<\\\n\t "
          <|> try (do lookAhead $ char '\\'
-                     notFollowedBy' $ rawLaTeXEnvironment'
-                                   <|> rawConTeXtEnvironment'
+                     notFollowedBy' rawTeXBlock
                      char '\\')
   return $ Str [result]
 
@@ -1246,18 +1245,16 @@ inlineNote = try $ do
 rawLaTeXInline' :: GenParser Char ParserState Inline
 rawLaTeXInline' = try $ do
   failIfStrict
-  lookAhead $ char '\\'
-  notFollowedBy' $ rawLaTeXEnvironment'
-                <|> rawConTeXtEnvironment'
+  lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
   RawInline _ s <- rawLaTeXInline
   return $ RawInline "tex" s  -- "tex" because it might be context or latex
 
-rawConTeXtEnvironment' :: GenParser Char st String
-rawConTeXtEnvironment' = try $ do
+rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
                <|> (many1 letter)
-  contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
+  contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
                        (try $ string "\\stop" >> string completion)
   return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
author	John MacFarlane <fiddlosopher@gmail.com>	2012-01-29 23:54:00 -0800
committer	John MacFarlane <fiddlosopher@gmail.com>	2012-02-04 09:56:43 -0800
commit	75485c2f112cdc2e1f95f871d01cc356510166ae (patch)
tree	2d495f048bd887403db41418c14445b54af1b00c /src/Text/Pandoc/Readers
parent	23ca68a5c41ef68397ed2217e31bf6e720fb0534 (diff)
download	pandoc-75485c2f112cdc2e1f95f871d01cc356510166ae.tar.gz