{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
{-
Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Readers.LaTeX
   Copyright   : Copyright (C) 2006-2015 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of LaTeX to 'Pandoc' document.
-}
module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                   rawLaTeXInline,
                                   rawLaTeXBlock,
                                   inlineCommand,
                                 ) where

import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
                                   mathDisplay, mathInline)
import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad
import Text.Pandoc.Builder
import Control.Applicative ((<|>), many, optional)
import Data.Maybe (fromMaybe, maybeToList)
import System.FilePath (replaceExtension, takeExtension, addExtension)
import Data.List (intercalate)
import qualified Data.Map as M
import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, report,
          readFileFromDirs)

-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
          => ReaderOptions -- ^ Reader options
          -> String        -- ^ String to parse (assumes @'\n'@ line endings)
          -> m Pandoc
readLaTeX opts ltx = do
  parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
  case parsed of
    Right result -> return result
    Left e -> throwError e

parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
  bs <- blocks
  eof
  st <- getState
  let meta = stateMeta st
  let (Pandoc _ bs') = doc bs
  return $ Pandoc meta bs'

type LP m = ParserT String ParserState m

anyControlSeq :: PandocMonad m => LP m String
anyControlSeq = do
  char '\\'
  next <- option '\n' anyChar
  case next of
       '\n'           -> return ""
       c | isLetter c -> (c:) <$> (many letter <* optional sp)
         | otherwise  -> return [c]

controlSeq :: PandocMonad m => String -> LP m String
controlSeq name = try $ do
  char '\\'
  case name of
        ""   -> mzero
        [c] | not (isLetter c) -> string [c]
        cs   -> string cs <* notFollowedBy letter <* optional sp
  return name

dimenarg :: PandocMonad m => LP m String
dimenarg = try $ do
  ch  <- option "" $ string "="
  num <- many1 digit
  dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
  return $ ch ++ num ++ dim

sp :: PandocMonad m => LP m ()
sp = whitespace <|> endline

whitespace :: PandocMonad m => LP m ()
whitespace = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')

endline :: PandocMonad m => LP m ()
endline = try (newline >> lookAhead anyChar >> notFollowedBy blankline)

isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'

tildeEscape :: PandocMonad m => LP m Char
tildeEscape = try $ do
  string "^^"
  c <- satisfy (\x -> x >= '\0' && x <= '\128')
  d <- if isLowerHex c
          then option "" $ count 1 (satisfy isLowerHex)
          else return ""
  if null d
     then case ord c of
           x | x >= 64 && x <= 127 -> return $ chr (x - 64)
             | otherwise           -> return $ chr (x + 64)
     else return $ chr $ read ('0':'x':c:d)

comment :: PandocMonad m => LP m ()
comment = do
  char '%'
  skipMany (satisfy (/='\n'))
  optional newline
  return ()

bgroup :: PandocMonad m => LP m ()
bgroup = try $ do
  skipMany (spaceChar <|> try (newline <* notFollowedBy blankline))
  () <$ char '{'
     <|> () <$ controlSeq "bgroup"
     <|> () <$ controlSeq "begingroup"

egroup :: PandocMonad m => LP m ()
egroup = () <$ char '}'
     <|> () <$ controlSeq "egroup"
     <|> () <$ controlSeq "endgroup"

grouped :: PandocMonad m => Monoid a => LP m a -> LP m a
grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup)

braced :: PandocMonad m => LP m String
braced = bgroup *> (concat <$> manyTill
         (  many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
        <|> try (string "\\}")
        <|> try (string "\\{")
        <|> try (string "\\\\")
        <|> ((\x -> "{" ++ x ++ "}") <$> braced)
        <|> count 1 anyChar
         ) egroup)

bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))

mathDisplay :: PandocMonad m => LP m String -> LP m Inlines
mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)

mathInline :: PandocMonad m => LP m String -> LP m Inlines
mathInline p = math <$> (try p >>= applyMacros')

mathChars :: PandocMonad m => LP m String
mathChars =
  concat <$> many (escapedChar
               <|> (snd <$> withRaw braced)
               <|> many1 (satisfy isOrdChar))
   where escapedChar = try $ do char '\\'
                                c <- anyChar
                                return ['\\',c]
         isOrdChar '$' = False
         isOrdChar '{' = False
         isOrdChar '}' = False
         isOrdChar '\\' = False
         isOrdChar _ = True

quoted' :: PandocMonad m => (Inlines -> Inlines) -> LP m String -> LP m () -> LP m Inlines
quoted' f starter ender = do
  startchs <- starter
  smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
  if smart
     then do
       ils <- many (notFollowedBy ender >> inline)
       (ender >> return (f (mconcat ils))) <|>
            (<> mconcat ils) <$>
                    lit (case startchs of
                              "``"  -> "“"
                              "`"   -> "‘"
                              _     -> startchs)
     else lit startchs

doubleQuote :: PandocMonad m => LP m Inlines
doubleQuote = do
  quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
   <|> quoted' doubleQuoted (string "“")        (void $ char '”')
   -- the following is used by babel for localized quotes:
   <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
   <|> quoted' doubleQuoted (string "\"")       (void $ char '"')

singleQuote :: PandocMonad m => LP m Inlines
singleQuote = do
  smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
  if smart
     then quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
      <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
     else str <$> many1 (oneOf "`\'‘’")

inline :: PandocMonad m => LP m Inlines
inline = (mempty <$ comment)
     <|> (space  <$ whitespace)
     <|> (softbreak <$ endline)
     <|> inlineText
     <|> inlineCommand
     <|> inlineEnvironment
     <|> inlineGroup
     <|> (char '-' *> option (str "-")
           (char '-' *> option (str "–") (str "—" <$ char '-')))
     <|> doubleQuote
     <|> singleQuote
     <|> (str "”" <$ try (string "''"))
     <|> (str "”" <$ char '”')
     <|> (str "’" <$ char '\'')
     <|> (str "’" <$ char '’')
     <|> (str "\160" <$ char '~')
     <|> mathDisplay (string "$$" *> mathChars <* string "$$")
     <|> mathInline  (char '$' *> mathChars <* char '$')
     <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
     <|> (str . (:[]) <$> tildeEscape)
     <|> (do res <- oneOf "#&~^'`\"[]"
             pos <- getPosition
             report $ ParsingUnescaped [res] pos
             return $ str [res])

inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)

inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
  ils <- grouped inline
  if isNull ils
     then return mempty
     else return $ spanWith nullAttr ils
          -- we need the span so we can detitlecase bibtex entries;
          -- we need to know when something is {C}apitalized

block :: PandocMonad m => LP m Blocks
block = (mempty <$ comment)
    <|> (mempty <$ ((spaceChar <|> newline) *> spaces))
    <|> environment
    <|> include
    <|> macro
    <|> blockCommand
    <|> paragraph
    <|> grouped block
    <|> (mempty <$ char '&')  -- loose & in table environment


blocks :: PandocMonad m => LP m Blocks
blocks = mconcat <$> many block

getRawCommand :: PandocMonad m => String -> LP m String
getRawCommand name' = do
  rawargs <- withRaw (many (try (optional sp *> opt)) *>
                      option "" (try (optional sp *> dimenarg)) *>
                      many braced)
  return $ '\\' : name' ++ snd rawargs

lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
lookupListDefault d = (fromMaybe d .) . lookupList
  where
  lookupList l m = msum $ map (`M.lookup` m) l

blockCommand :: PandocMonad m => LP m Blocks
blockCommand = try $ do
  name <- anyControlSeq
  guard $ name /= "begin" && name /= "end"
  star <- option "" (string "*" <* optional sp)
  let name' = name ++ star
  let raw = do
        rawcommand <- getRawCommand name'
        transformed <- applyMacros' rawcommand
        guard $ transformed /= rawcommand
        notFollowedBy $ parseFromString inlines transformed
        parseFromString blocks transformed
  lookupListDefault raw [name',name] blockCommands

inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"

-- eat an optional argument and one or more arguments in braces
ignoreInlines :: PandocMonad m => String -> (String, LP m Inlines)
ignoreInlines name = (name, p)
  where
    p = do oa <- optargs
           let rawCommand = '\\':name ++ oa
           let doraw = guardRaw >> return (rawInline "latex" rawCommand)
           doraw <|> ignore rawCommand

guardRaw :: PandocMonad m => LP m ()
guardRaw = getOption readerExtensions >>= guard . extensionEnabled Ext_raw_tex

optargs :: PandocMonad m => LP m String
optargs = snd <$> withRaw (skipopts *> skipMany (try $ optional sp *> braced))

ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
ignore raw = do
  pos <- getPosition
  report $ SkippedContent raw pos
  return mempty

ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks)
ignoreBlocks name = (name, p)
  where
    p = do oa <- optargs
           let rawCommand = '\\':name ++ oa
           let doraw = guardRaw >> return (rawBlock "latex" rawCommand)
           doraw <|> ignore rawCommand

blockCommands :: PandocMonad m => M.Map String (LP m Blocks)
blockCommands = M.fromList $
  [ ("par", mempty <$ skipopts)
  , ("parbox",  braced >> grouped blocks)
  , ("title", mempty <$ (skipopts *>
                          (grouped inline >>= addMeta "title")
                      <|> (grouped block >>= addMeta "title")))
  , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
  , ("author", mempty <$ (skipopts *> authors))
  -- -- in letter class, temp. store address & sig as title, author
  , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
  , ("signature", mempty <$ (skipopts *> authors))
  , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
  -- sectioning
  , ("chapter", updateState (\s -> s{ stateHasChapters = True })
                      *> section nullAttr 0)
  , ("chapter*", updateState (\s -> s{ stateHasChapters = True })
                      *> section ("",["unnumbered"],[]) 0)
  , ("section", section nullAttr 1)
  , ("section*", section ("",["unnumbered"],[]) 1)
  , ("subsection", section nullAttr 2)
  , ("subsection*", section ("",["unnumbered"],[]) 2)
  , ("subsubsection", section nullAttr 3)
  , ("subsubsection*", section ("",["unnumbered"],[]) 3)
  , ("paragraph", section nullAttr 4)
  , ("paragraph*", section ("",["unnumbered"],[]) 4)
  , ("subparagraph", section nullAttr 5)
  , ("subparagraph*", section ("",["unnumbered"],[]) 5)
  -- beamer slides
  , ("frametitle", section nullAttr 3)
  , ("framesubtitle", section nullAttr 4)
  -- letters
  , ("opening", (para . trimInlines) <$> (skipopts *> tok))
  , ("closing", skipopts *> closing)
  --
  , ("hrule", pure horizontalRule)
  , ("strut", pure mempty)
  , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
  , ("item", skipopts *> looseItem)
  , ("documentclass", skipopts *> braced *> preamble)
  , ("centerline", (para . trimInlines) <$> (skipopts *> tok))
  , ("caption", skipopts *> setCaption)
  , ("bibliography", mempty <$ (skipopts *> braced >>=
                                addMeta "bibliography" . splitBibs))
  , ("addbibresource", mempty <$ (skipopts *> braced >>=
                                addMeta "bibliography" . splitBibs))
  -- includes
  , ("lstinputlisting", inputListing)
  ] ++ map ignoreBlocks
  -- these commands will be ignored unless --parse-raw is specified,
  -- in which case they will appear as raw latex blocks
  [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
    -- newcommand, etc. should be parsed by macro, but we need this
    -- here so these aren't parsed as inline commands to ignore
  , "special", "pdfannot", "pdfstringdef"
  , "bibliographystyle"
  , "maketitle", "makeindex", "makeglossary"
  , "addcontentsline", "addtocontents", "addtocounter"
     -- \ignore{} is used conventionally in literate haskell for definitions
     -- that are to be processed by the compiler but not printed.
  , "ignore"
  , "hyperdef"
  , "markboth", "markright", "markleft"
  , "newpage"
  ]

addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
addMeta field val = updateState $ \st ->
  st{ stateMeta = addMetaField field val $ stateMeta st }

splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')

setCaption :: PandocMonad m => LP m Blocks
setCaption = do
  ils <- tok
  mblabel <- option Nothing $
               try $ spaces' >> controlSeq "label" >> (Just <$> tok)
  let ils' = case mblabel of
                  Just lab -> ils <> spanWith
                                ("",[],[("data-label", stringify lab)]) mempty
                  Nothing  -> ils
  updateState $ \st -> st{ stateCaption = Just ils' }
  return mempty

resetCaption :: PandocMonad m => LP m ()
resetCaption = updateState $ \st -> st{ stateCaption = Nothing }

authors :: PandocMonad m => LP m ()
authors = try $ do
  char '{'
  let oneAuthor = mconcat <$>
       many1 (notFollowedBy' (controlSeq "and") >>
               (inline <|> mempty <$ blockCommand))
               -- skip e.g. \vspace{10pt}
  auths <- sepBy oneAuthor (controlSeq "and")
  char '}'
  addMeta "author" (map trimInlines auths)

section :: PandocMonad m => Attr -> Int -> LP m Blocks
section (ident, classes, kvs) lvl = do
  hasChapters <- stateHasChapters `fmap` getState
  let lvl' = if hasChapters then lvl + 1 else lvl
  skipopts
  contents <- grouped inline
  lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
  attr' <- registerHeader (lab, classes, kvs) contents
  return $ headerWith attr' lvl' contents

inlineCommand :: PandocMonad m => LP m Inlines
inlineCommand = try $ do
  (name, raw') <- withRaw anyControlSeq
  guard $ name /= "begin" && name /= "end"
  guard $ not $ isBlockCommand name
  exts <- getOption readerExtensions
  star <- option "" (string "*")
  let name' = name ++ star
  let raw = do
        rawargs <- withRaw
               (skipangles *> skipopts *> option "" dimenarg *> many braced)
        let rawcommand = raw' ++ star ++ snd rawargs
        transformed <- applyMacros' rawcommand
        if transformed /= rawcommand
           then parseFromString inlines transformed
           else if extensionEnabled Ext_raw_tex exts
                   then return $ rawInline "latex" rawcommand
                   else ignore rawcommand
  (lookupListDefault mzero [name',name] inlineCommands <*
      optional (try (string "{}")))
    <|> raw

unlessParseRaw :: PandocMonad m => LP m ()
unlessParseRaw = getOption readerExtensions >>=
                 guard . not . extensionEnabled Ext_raw_tex

isBlockCommand :: String -> Bool
isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks))


inlineEnvironments :: PandocMonad m => M.Map String (LP m Inlines)
inlineEnvironments = M.fromList
  [ ("displaymath", mathEnv id Nothing "displaymath")
  , ("math", math <$> verbEnv "math")
  , ("equation", mathEnv id Nothing "equation")
  , ("equation*", mathEnv id Nothing "equation*")
  , ("gather", mathEnv id (Just "gathered") "gather")
  , ("gather*", mathEnv id (Just "gathered") "gather*")
  , ("multline", mathEnv id (Just "gathered") "multline")
  , ("multline*", mathEnv id (Just "gathered") "multline*")
  , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
  , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
  , ("align", mathEnv id (Just "aligned") "align")
  , ("align*", mathEnv id (Just "aligned") "align*")
  , ("alignat", mathEnv id (Just "aligned") "alignat")
  , ("alignat*", mathEnv id (Just "aligned") "alignat*")
  ]

inlineCommands :: PandocMonad m => M.Map String (LP m Inlines)
inlineCommands = M.fromList $
  [ ("emph", extractSpaces emph <$> tok)
  , ("textit", extractSpaces emph <$> tok)
  , ("textsl", extractSpaces emph <$> tok)
  , ("textsc", extractSpaces smallcaps <$> tok)
  , ("sout", extractSpaces strikeout <$> tok)
  , ("textsuperscript", extractSpaces superscript <$> tok)
  , ("textsubscript", extractSpaces subscript <$> tok)
  , ("textbackslash", lit "\\")
  , ("backslash", lit "\\")
  , ("slash", lit "/")
  , ("textbf", extractSpaces strong <$> tok)
  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
  , ("ldots", lit "…")
  , ("dots", lit "…")
  , ("mdots", lit "…")
  , ("sim", lit "~")
  , ("label", unlessParseRaw >> (inBrackets <$> tok))
  , ("ref", unlessParseRaw >> (inBrackets <$> tok))
  , ("noindent", unlessParseRaw >> ignore "noindent")
  , ("textgreek", tok)
  , ("sep", lit ",")
  , ("cref", unlessParseRaw >> (inBrackets <$> tok))  -- from cleveref.sty
  , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
  , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
  , ("ensuremath", mathInline braced)
  , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
  , ("P", lit "¶")
  , ("S", lit "§")
  , ("$", lit "$")
  , ("%", lit "%")
  , ("&", lit "&")
  , ("#", lit "#")
  , ("_", lit "_")
  , ("{", lit "{")
  , ("}", lit "}")
  -- old TeX commands
  , ("em", extractSpaces emph <$> inlines)
  , ("it", extractSpaces emph <$> inlines)
  , ("sl", extractSpaces emph <$> inlines)
  , ("bf", extractSpaces strong <$> inlines)
  , ("rm", inlines)
  , ("itshape", extractSpaces emph <$> inlines)
  , ("slshape", extractSpaces emph <$> inlines)
  , ("scshape", extractSpaces smallcaps <$> inlines)
  , ("bfseries", extractSpaces strong <$> inlines)
  , ("/", pure mempty) -- italic correction
  , ("aa", lit "å")
  , ("AA", lit "Å")
  , ("ss", lit "ß")
  , ("o", lit "ø")
  , ("O", lit "Ø")
  , ("L", lit "Ł")
  , ("l", lit "ł")
  , ("ae", lit "æ")
  , ("AE", lit "Æ")
  , ("oe", lit "œ")
  , ("OE", lit "Œ")
  , ("pounds", lit "£")
  , ("euro", lit "€")
  , ("copyright", lit "©")
  , ("textasciicircum", lit "^")
  , ("textasciitilde", lit "~")
  , ("H", try $ tok >>= accent hungarumlaut)
  , ("`", option (str "`") $ try $ tok >>= accent grave)
  , ("'", option (str "'") $ try $ tok >>= accent acute)
  , ("^", option (str "^") $ try $ tok >>= accent circ)
  , ("~", option (str "~") $ try $ tok >>= accent tilde)
  , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
  , (".", option (str ".") $ try $ tok >>= accent dot)
  , ("=", option (str "=") $ try $ tok >>= accent macron)
  , ("c", option (str "c") $ try $ tok >>= accent cedilla)
  , ("v", option (str "v") $ try $ tok >>= accent hacek)
  , ("u", option (str "u") $ try $ tok >>= accent breve)
  , ("i", lit "i")
  , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
  , (",", pure mempty)
  , ("@", pure mempty)
  , (" ", lit "\160")
  , ("ps", pure $ str "PS." <> space)
  , ("TeX", lit "TeX")
  , ("LaTeX", lit "LaTeX")
  , ("bar", lit "|")
  , ("textless", lit "<")
  , ("textgreater", lit ">")
  , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
  , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
  , ("verb", doverb)
  , ("lstinline", skipopts *> doverb)
  , ("Verb", doverb)
  , ("texttt", (code . stringify . toList) <$> tok)
  , ("url", (unescapeURL <$> braced) >>= \url ->
       pure (link url "" (str url)))
  , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
       tok >>= \lab ->
         pure (link url "" lab))
  , ("includegraphics", do options <- option [] keyvals
                           src <- unescapeURL . removeDoubleQuotes <$> braced
                           mkImage options src)
  , ("enquote", enquote)
  , ("cite", citation "cite" NormalCitation False)
  , ("Cite", citation "Cite" NormalCitation False)
  , ("citep", citation "citep" NormalCitation False)
  , ("citep*", citation "citep*" NormalCitation False)
  , ("citeal", citation "citeal" NormalCitation False)
  , ("citealp", citation "citealp" NormalCitation False)
  , ("citealp*", citation "citealp*" NormalCitation False)
  , ("autocite", citation "autocite" NormalCitation False)
  , ("smartcite", citation "smartcite" NormalCitation False)
  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
  , ("parencite", citation "parencite" NormalCitation False)
  , ("supercite", citation "supercite" NormalCitation False)
  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
  , ("citeyear", citation "citeyear" SuppressAuthor False)
  , ("autocite*", citation "autocite*" SuppressAuthor False)
  , ("cite*", citation "cite*" SuppressAuthor False)
  , ("parencite*", citation "parencite*" SuppressAuthor False)
  , ("textcite", citation "textcite" AuthorInText False)
  , ("citet", citation "citet" AuthorInText False)
  , ("citet*", citation "citet*" AuthorInText False)
  , ("citealt", citation "citealt" AuthorInText False)
  , ("citealt*", citation "citealt*" AuthorInText False)
  , ("textcites", citation "textcites" AuthorInText True)
  , ("cites", citation "cites" NormalCitation True)
  , ("autocites", citation "autocites" NormalCitation True)
  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
  , ("parencites", citation "parencites" NormalCitation True)
  , ("supercites", citation "supercites" NormalCitation True)
  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
  , ("Autocite", citation "Autocite" NormalCitation False)
  , ("Smartcite", citation "Smartcite" NormalCitation False)
  , ("Footcite", citation "Footcite" NormalCitation False)
  , ("Parencite", citation "Parencite" NormalCitation False)
  , ("Supercite", citation "Supercite" NormalCitation False)
  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
  , ("Cite*", citation "Cite*" SuppressAuthor False)
  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
  , ("Textcite", citation "Textcite" AuthorInText False)
  , ("Textcites", citation "Textcites" AuthorInText True)
  , ("Cites", citation "Cites" NormalCitation True)
  , ("Autocites", citation "Autocites" NormalCitation True)
  , ("Footcites", citation "Footcites" NormalCitation True)
  , ("Parencites", citation "Parencites" NormalCitation True)
  , ("Supercites", citation "Supercites" NormalCitation True)
  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
  , ("citetext", complexNatbibCitation NormalCitation)
  , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
                        complexNatbibCitation AuthorInText)
                   <|> citation "citeauthor" AuthorInText False)
  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
                          addMeta "nocite"))
  ] ++ map ignoreInlines
  -- these commands will be ignored unless --parse-raw is specified,
  -- in which case they will appear as raw latex blocks:
  [ "index" ]

mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
mkImage options src = do
   let replaceTextwidth (k,v) = case numUnit v of
                                  Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
                                  _ -> (k, v)
   let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
   let attr = ("",[], kvs)
   let alt = str "image"
   case takeExtension src of
        "" -> do
              defaultExt <- getOption readerDefaultImageExtension
              return $ imageWith attr (addExtension src defaultExt) "" alt
        _  -> return $ imageWith attr src "" alt

inNote :: Inlines -> Inlines
inNote ils =
  note $ para $ ils <> str "."

unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
  where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""

enquote :: PandocMonad m => LP m Inlines
enquote = do
  skipopts
  context <- stateQuoteContext <$> getState
  if context == InDoubleQuote
     then singleQuoted <$> withQuoteContext InSingleQuote tok
     else doubleQuoted <$> withQuoteContext InDoubleQuote tok

doverb :: PandocMonad m => LP m Inlines
doverb = do
  marker <- anyChar
  code <$> manyTill (satisfy (/='\n')) (char marker)

doLHSverb :: PandocMonad m => LP m Inlines
doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')

lit :: String -> LP m Inlines
lit = pure . str

accent :: (Char -> String) -> Inlines -> LP m Inlines
accent f ils =
  case toList ils of
       (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
       []                -> mzero
       _                 -> return ils

grave :: Char -> String
grave 'A' = "À"
grave 'E' = "È"
grave 'I' = "Ì"
grave 'O' = "Ò"
grave 'U' = "Ù"
grave 'a' = "à"
grave 'e' = "è"
grave 'i' = "ì"
grave 'o' = "ò"
grave 'u' = "ù"
grave c   = [c]

acute :: Char -> String
acute 'A' = "Á"
acute 'E' = "É"
acute 'I' = "Í"
acute 'O' = "Ó"
acute 'U' = "Ú"
acute 'Y' = "Ý"
acute 'a' = "á"
acute 'e' = "é"
acute 'i' = "í"
acute 'o' = "ó"
acute 'u' = "ú"
acute 'y' = "ý"
acute 'C' = "Ć"
acute 'c' = "ć"
acute 'L' = "Ĺ"
acute 'l' = "ĺ"
acute 'N' = "Ń"
acute 'n' = "ń"
acute 'R' = "Ŕ"
acute 'r' = "ŕ"
acute 'S' = "Ś"
acute 's' = "ś"
acute 'Z' = "Ź"
acute 'z' = "ź"
acute c   = [c]

circ :: Char -> String
circ 'A' = "Â"
circ 'E' = "Ê"
circ 'I' = "Î"
circ 'O' = "Ô"
circ 'U' = "Û"
circ 'a' = "â"
circ 'e' = "ê"
circ 'i' = "î"
circ 'o' = "ô"
circ 'u' = "û"
circ 'C' = "Ĉ"
circ 'c' = "ĉ"
circ 'G' = "Ĝ"
circ 'g' = "ĝ"
circ 'H' = "Ĥ"
circ 'h' = "ĥ"
circ 'J' = "Ĵ"
circ 'j' = "ĵ"
circ 'S' = "Ŝ"
circ 's' = "ŝ"
circ 'W' = "Ŵ"
circ 'w' = "ŵ"
circ 'Y' = "Ŷ"
circ 'y' = "ŷ"
circ c   = [c]

tilde :: Char -> String
tilde 'A' = "Ã"
tilde 'a' = "ã"
tilde 'O' = "Õ"
tilde 'o' = "õ"
tilde 'I' = "Ĩ"
tilde 'i' = "ĩ"
tilde 'U' = "Ũ"
tilde 'u' = "ũ"
tilde 'N' = "Ñ"
tilde 'n' = "ñ"
tilde c   = [c]

umlaut :: Char -> String
umlaut 'A' = "Ä"
umlaut 'E' = "Ë"
umlaut 'I' = "Ï"
umlaut 'O' = "Ö"
umlaut 'U' = "Ü"
umlaut 'a' = "ä"
umlaut 'e' = "ë"
umlaut 'i' = "ï"
umlaut 'o' = "ö"
umlaut 'u' = "ü"
umlaut c   = [c]

hungarumlaut :: Char -> String
hungarumlaut 'A' = "A̋"
hungarumlaut 'E' = "E̋"
hungarumlaut 'I' = "I̋"
hungarumlaut 'O' = "Ő"
hungarumlaut 'U' = "Ű"
hungarumlaut 'Y' = "ӳ"
hungarumlaut 'a' = "a̋"
hungarumlaut 'e' = "e̋"
hungarumlaut 'i' = "i̋"
hungarumlaut 'o' = "ő"
hungarumlaut 'u' = "ű"
hungarumlaut 'y' = "ӳ"
hungarumlaut c   = [c]

dot :: Char -> String
dot 'C' = "Ċ"
dot 'c' = "ċ"
dot 'E' = "Ė"
dot 'e' = "ė"
dot 'G' = "Ġ"
dot 'g' = "ġ"
dot 'I' = "İ"
dot 'Z' = "Ż"
dot 'z' = "ż"
dot c   = [c]

macron :: Char -> String
macron 'A' = "Ā"
macron 'E' = "Ē"
macron 'I' = "Ī"
macron 'O' = "Ō"
macron 'U' = "Ū"
macron 'a' = "ā"
macron 'e' = "ē"
macron 'i' = "ī"
macron 'o' = "ō"
macron 'u' = "ū"
macron c   = [c]

cedilla :: Char -> String
cedilla 'c' = "ç"
cedilla 'C' = "Ç"
cedilla 's' = "ş"
cedilla 'S' = "Ş"
cedilla 't' = "ţ"
cedilla 'T' = "Ţ"
cedilla 'e' = "ȩ"
cedilla 'E' = "Ȩ"
cedilla 'h' = "ḩ"
cedilla 'H' = "Ḩ"
cedilla 'o' = "o̧"
cedilla 'O' = "O̧"
cedilla c   = [c]

hacek :: Char -> String
hacek 'A' = "Ǎ"
hacek 'a' = "ǎ"
hacek 'C' = "Č"
hacek 'c' = "č"
hacek 'D' = "Ď"
hacek 'd' = "ď"
hacek 'E' = "Ě"
hacek 'e' = "ě"
hacek 'G' = "Ǧ"
hacek 'g' = "ǧ"
hacek 'H' = "Ȟ"
hacek 'h' = "ȟ"
hacek 'I' = "Ǐ"
hacek 'i' = "ǐ"
hacek 'j' = "ǰ"
hacek 'K' = "Ǩ"
hacek 'k' = "ǩ"
hacek 'L' = "Ľ"
hacek 'l' = "ľ"
hacek 'N' = "Ň"
hacek 'n' = "ň"
hacek 'O' = "Ǒ"
hacek 'o' = "ǒ"
hacek 'R' = "Ř"
hacek 'r' = "ř"
hacek 'S' = "Š"
hacek 's' = "š"
hacek 'T' = "Ť"
hacek 't' = "ť"
hacek 'U' = "Ǔ"
hacek 'u' = "ǔ"
hacek 'Z' = "Ž"
hacek 'z' = "ž"
hacek c   = [c]

breve :: Char -> String
breve 'A' = "Ă"
breve 'a' = "ă"
breve 'E' = "Ĕ"
breve 'e' = "ĕ"
breve 'G' = "Ğ"
breve 'g' = "ğ"
breve 'I' = "Ĭ"
breve 'i' = "ĭ"
breve 'O' = "Ŏ"
breve 'o' = "ŏ"
breve 'U' = "Ŭ"
breve 'u' = "ŭ"
breve c   = [c]

tok :: PandocMonad m => LP m Inlines
tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar

opt :: PandocMonad m => LP m Inlines
opt = bracketed inline

rawopt :: PandocMonad m => LP m String
rawopt = do
  contents <- bracketed (many1 (noneOf "[]") <|> try (string "\\]") <|>
                   try (string "\\[") <|> rawopt)
  optional sp
  return $ "[" ++ contents ++ "]"

skipopts :: PandocMonad m => LP m ()
skipopts = skipMany rawopt

-- opts in angle brackets are used in beamer
rawangle :: PandocMonad m => LP m ()
rawangle = try $ do
  char '<'
  skipMany (noneOf ">")
  char '>'
  return ()

skipangles :: PandocMonad m => LP m ()
skipangles = skipMany rawangle

inlineText :: PandocMonad m => LP m Inlines
inlineText = str <$> many1 inlineChar

inlineChar :: PandocMonad m => LP m Char
inlineChar = noneOf "\\$%&~#{}^'`\"‘’“”-[] \t\n"

environment :: PandocMonad m => LP m Blocks
environment = do
  controlSeq "begin"
  name <- braced
  M.findWithDefault mzero name environments
    <|> rawEnv name

inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
  controlSeq "begin"
  name <- braced
  M.findWithDefault mzero name inlineEnvironments

rawEnv :: PandocMonad m => String -> LP m Blocks
rawEnv name = do
  exts <- getOption readerExtensions
  let parseRaw = extensionEnabled Ext_raw_tex exts
  rawOptions <- mconcat <$> many rawopt
  let beginCommand = "\\begin{" ++ name ++ "}" ++ rawOptions
  unless parseRaw $ do
    pos1 <- getPosition
    report $ SkippedContent beginCommand pos1
  (bs, raw) <- withRaw $ env name blocks
  raw' <- applyMacros' raw
  if parseRaw
     then return $ rawBlock "latex" $ beginCommand ++ raw'
     else do
       pos2 <- getPosition
       report $ SkippedContent ("\\end{" ++ name ++ "}") pos2
       return bs

----

braced' :: PandocMonad m => LP m String
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')

maybeAddExtension :: String -> FilePath -> FilePath
maybeAddExtension ext fp =
  if null (takeExtension fp)
     then addExtension fp ext
     else fp

include :: PandocMonad m => LP m Blocks
include = do
  fs' <- try $ do
              char '\\'
              name <- try (string "include")
                  <|> try (string "input")
                  <|> string "usepackage"
              -- skip options
              skipMany $ try $ char '[' *> manyTill anyChar (char ']')
              fs <- (map trim . splitBy (==',')) <$> braced'
              return $ if name == "usepackage"
                          then map (maybeAddExtension ".sty") fs
                          else map (maybeAddExtension ".tex") fs
  dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
  mconcat <$> mapM (insertIncludedFile blocks dirs) fs'

inputListing :: PandocMonad m => LP m Blocks
inputListing = do
  pos <- getPosition
  options <- option [] keyvals
  f <- filter (/='"') <$> braced
  dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
  mbCode <- readFileFromDirs dirs f
  codeLines <- case mbCode of
                      Just s -> return $ lines s
                      Nothing -> do
                        report $ CouldNotLoadIncludeFile f pos
                        return []
  let (ident,classes,kvs) = parseListingsOptions options
  let language = case lookup "language" options >>= fromListingsLanguage of
                      Just l -> [l]
                      Nothing -> take 1 $ languagesByExtension (takeExtension f)
  let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
  let lastline = fromMaybe (length codeLines) $
                       lookup "lastline" options >>= safeRead
  let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
                       drop (firstline - 1) codeLines
  return $ codeBlockWith (ident,ordNub (classes ++ language),kvs) codeContents

parseListingsOptions :: [(String, String)] -> Attr
parseListingsOptions options =
  let kvs = [ (if k == "firstnumber"
                  then "startFrom"
                  else k, v) | (k,v) <- options ]
      classes = [ "numberLines" |
                  lookup "numbers" options == Just "left" ]
             ++ maybeToList (lookup "language" options
                     >>= fromListingsLanguage)
  in  (fromMaybe "" (lookup "label" options), classes, kvs)

----

keyval :: PandocMonad m => LP m (String, String)
keyval = try $ do
  key <- many1 alphaNum
  val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
  skipMany spaceChar
  optional (char ',')
  skipMany spaceChar
  return (key, val)


keyvals :: PandocMonad m => LP m [(String, String)]
keyvals = try $ char '[' *> manyTill keyval (char ']')

alltt :: PandocMonad m => String -> LP m Blocks
alltt t = walk strToCode <$> parseFromString blocks
  (substitute " " "\\ " $ substitute "%" "\\%" $
   intercalate "\\\\\n" $ lines t)
  where strToCode (Str s) = Code nullAttr s
        strToCode x       = x

rawLaTeXBlock :: PandocMonad m => LP m String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))

rawLaTeXInline :: PandocMonad m => LP m Inline
rawLaTeXInline = do
  raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
  RawInline "latex" <$> applyMacros' raw

addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
  where go (Image attr alt (src,tit)) = do
          mbcapt <- stateCaption <$> getState
          return $ case mbcapt of
               Just ils -> Image attr (toList ils) (src, "fig:")
               Nothing  -> Image attr alt (src,tit)
        go x = return x

addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
  where go (Table c als ws hs rs) = do
          mbcapt <- stateCaption <$> getState
          return $ case mbcapt of
               Just ils -> Table (toList ils) als ws hs rs
               Nothing  -> Table c als ws hs rs
        go x = return x

environments :: PandocMonad m => M.Map String (LP m Blocks)
environments = M.fromList
  [ ("document", env "document" blocks <* skipMany anyChar)
  , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
  , ("letter", env "letter" letterContents)
  , ("minipage", env "minipage" $
         skipopts *> spaces' *> optional braced *> spaces' *> blocks)
  , ("figure", env "figure" $
         resetCaption *> skipopts *> blocks >>= addImageCaption)
  , ("center", env "center" blocks)
  , ("longtable",  env "longtable" $
         resetCaption *> simpTable False >>= addTableCaption)
  , ("table",  env "table" $
         resetCaption *> skipopts *> blocks >>= addTableCaption)
  , ("tabular*", env "tabular" $ simpTable True)
  , ("tabular", env "tabular"  $ simpTable False)
  , ("quote", blockQuote <$> env "quote" blocks)
  , ("quotation", blockQuote <$> env "quotation" blocks)
  , ("verse", blockQuote <$> env "verse" blocks)
  , ("itemize", bulletList <$> listenv "itemize" (many item))
  , ("description", definitionList <$> listenv "description" (many descItem))
  , ("enumerate", orderedList')
  , ("alltt", alltt =<< verbEnv "alltt")
  , ("code", guardEnabled Ext_literate_haskell *>
      (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
        verbEnv "code"))
  , ("comment", mempty <$ verbEnv "comment")
  , ("verbatim", codeBlock <$> verbEnv "verbatim")
  , ("Verbatim", fancyverbEnv "Verbatim")
  , ("BVerbatim", fancyverbEnv "BVerbatim")
  , ("lstlisting", do attr <- parseListingsOptions <$> option [] keyvals
                      codeBlockWith attr <$> verbEnv "lstlisting")
  , ("minted",     do options <- option [] keyvals
                      lang <- grouped (many1 $ satisfy (/='}'))
                      let kvs = [ (if k == "firstnumber"
                                      then "startFrom"
                                      else k, v) | (k,v) <- options ]
                      let classes = [ lang | not (null lang) ] ++
                                    [ "numberLines" |
                                      lookup "linenos" options == Just "true" ]
                      let attr = ("",classes,kvs)
                      codeBlockWith attr <$> verbEnv "minted")
  , ("obeylines", parseFromString
                  (para . trimInlines . mconcat <$> many inline) =<<
                  intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
  , ("displaymath", mathEnv para Nothing "displaymath")
  , ("equation", mathEnv para Nothing "equation")
  , ("equation*", mathEnv para Nothing "equation*")
  , ("gather", mathEnv para (Just "gathered") "gather")
  , ("gather*", mathEnv para (Just "gathered") "gather*")
  , ("multline", mathEnv para (Just "gathered") "multline")
  , ("multline*", mathEnv para (Just "gathered") "multline*")
  , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
  , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
  , ("align", mathEnv para (Just "aligned") "align")
  , ("align*", mathEnv para (Just "aligned") "align*")
  , ("alignat", mathEnv para (Just "aligned") "alignat")
  , ("alignat*", mathEnv para (Just "aligned") "alignat*")
  ]

letterContents :: PandocMonad m => LP m Blocks
letterContents = do
  bs <- blocks
  st <- getState
  -- add signature (author) and address (title)
  let addr = case lookupMeta "address" (stateMeta st) of
                  Just (MetaBlocks [Plain xs]) ->
                     para $ trimInlines $ fromList xs
                  _ -> mempty
  return $ addr <> bs -- sig added by \closing

closing :: PandocMonad m => LP m Blocks
closing = do
  contents <- tok
  st <- getState
  let extractInlines (MetaBlocks [Plain ys]) = ys
      extractInlines (MetaBlocks [Para ys ]) = ys
      extractInlines _          = []
  let sigs = case lookupMeta "author" (stateMeta st) of
                  Just (MetaList xs) ->
                    para $ trimInlines $ fromList $
                      intercalate [LineBreak] $ map extractInlines xs
                  _ -> mempty
  return $ para (trimInlines contents) <> sigs

item :: PandocMonad m => LP m Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks

looseItem :: PandocMonad m => LP m Blocks
looseItem = do
  ctx <- stateParserContext `fmap` getState
  if ctx == ListItemState
     then mzero
     else return mempty

descItem :: PandocMonad m => LP m (Inlines, [Blocks])
descItem = do
  blocks -- skip blocks before item
  controlSeq "item"
  optional sp
  ils <- opt
  bs <- blocks
  return (ils, [bs])

env :: PandocMonad m => String -> LP m a -> LP m a
env name p = p <*
  (try (controlSeq "end" *> braced >>= guard . (== name))
    <?> ("\\end{" ++ name ++ "}"))

listenv :: PandocMonad m => String -> LP m a -> LP m a
listenv name p = try $ do
  oldCtx <- stateParserContext `fmap` getState
  updateState $ \st -> st{ stateParserContext = ListItemState }
  res <- env name p
  updateState $ \st -> st{ stateParserContext = oldCtx }
  return res

mathEnv :: PandocMonad m => (Inlines -> a) -> Maybe String -> String -> LP m a
mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
   where inner x = case innerEnv of
                      Nothing -> x
                      Just y  -> "\\begin{" ++ y ++ "}\n" ++ x ++
                                    "\\end{" ++ y ++ "}"

verbEnv :: PandocMonad m => String -> LP m String
verbEnv name = do
  skipopts
  optional blankline
  let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
  res <- manyTill anyChar endEnv
  return $ stripTrailingNewlines res

fancyverbEnv :: PandocMonad m => String -> LP m Blocks
fancyverbEnv name = do
  options <- option [] keyvals
  let kvs = [ (if k == "firstnumber"
                  then "startFrom"
                  else k, v) | (k,v) <- options ]
  let classes = [ "numberLines" |
                  lookup "numbers" options == Just "left" ]
  let attr = ("",classes,kvs)
  codeBlockWith attr <$> verbEnv name

orderedList' :: PandocMonad m => LP m Blocks
orderedList' = do
  optional sp
  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
                              try $ char '[' *> anyOrderedListMarker <* char ']'
  spaces
  optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
  spaces
  start <- option 1 $ try $ do controlSeq "setcounter"
                               grouped (string "enum" *> many1 (oneOf "iv"))
                               optional sp
                               num <- grouped (many1 digit)
                               spaces
                               return (read num + 1 :: Int)
  bs <- listenv "enumerate" (many item)
  return $ orderedListWith (start, style, delim) bs

paragraph :: PandocMonad m => LP m Blocks
paragraph = do
  x <- trimInlines . mconcat <$> many1 inline
  if x == mempty
     then return mempty
     else return $ para x

preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
  where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
        preambleBlock =  void comment
                     <|> void sp
                     <|> void blanklines
                     <|> void include
                     <|> void macro
                     <|> void blockCommand
                     <|> void anyControlSeq
                     <|> void braced
                     <|> void anyChar

-------

-- citations

addPrefix :: [Inline] -> [Citation] -> [Citation]
addPrefix p (k:ks)   = k {citationPrefix = p ++ citationPrefix k} : ks
addPrefix _ _ = []

addSuffix :: [Inline] -> [Citation] -> [Citation]
addSuffix s ks@(_:_) =
  let k = last ks
  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
addSuffix _ _ = []

simpleCiteArgs :: PandocMonad m => LP m [Citation]
simpleCiteArgs = try $ do
  first  <- optionMaybe $ toList <$> opt
  second <- optionMaybe $ toList <$> opt
  char '{'
  optional sp
  keys <- manyTill citationLabel (char '}')
  let (pre, suf) = case (first  , second ) of
        (Just s , Nothing) -> (mempty, s )
        (Just s , Just t ) -> (s , t )
        _                  -> (mempty, mempty)
      conv k = Citation { citationId      = k
                        , citationPrefix  = []
                        , citationSuffix  = []
                        , citationMode    = NormalCitation
                        , citationHash    = 0
                        , citationNoteNum = 0
                        }
  return $ addPrefix pre $ addSuffix suf $ map conv keys

citationLabel :: PandocMonad m => LP m String
citationLabel  = optional sp *>
  (many1 (satisfy isBibtexKeyChar)
          <* optional sp
          <* optional (char ',')
          <* optional sp)
  where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]" :: String)

cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
cites mode multi = try $ do
  cits <- if multi
             then many1 simpleCiteArgs
             else count 1 simpleCiteArgs
  let cs = concat cits
  return $ case mode of
        AuthorInText -> case cs of
                             (c:rest) -> c {citationMode = mode} : rest
                             []       -> []
        _            -> map (\a -> a {citationMode = mode}) cs

citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
  (c,raw) <- withRaw $ cites mode multi
  return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)

complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
complexNatbibCitation mode = try $ do
  let ils = (toList . trimInlines . mconcat) <$>
              many (notFollowedBy (oneOf "\\};") >> inline)
  let parseOne = try $ do
                   skipSpaces
                   pref  <- ils
                   cit' <- inline -- expect a citation
                   let citlist = toList cit'
                   cits' <- case citlist of
                                 [Cite cs _] -> return cs
                                 _           -> mzero
                   suff  <- ils
                   skipSpaces
                   optional $ char ';'
                   return $ addPrefix pref $ addSuffix suff cits'
  (c:cits, raw) <- withRaw $ grouped parseOne
  return $ cite (c{ citationMode = mode }:cits)
           (rawInline "latex" $ "\\citetext" ++ raw)

-- tables

parseAligns :: PandocMonad m => LP m [(String, Alignment, String)]
parseAligns = try $ do
  char '{'
  let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
  maybeBar
  let cAlign = AlignCenter <$ char 'c'
  let lAlign = AlignLeft <$ char 'l'
  let rAlign = AlignRight <$ char 'r'
  let parAlign = AlignLeft <$ (char 'p' >> braced)
  let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign
  let alignPrefix = char '>' >> braced
  let alignSuffix = char '<' >> braced
  let alignSpec = do
        spaces
        pref <- option "" alignPrefix
        spaces
        ch <- alignChar
        spaces
        suff <- option "" alignSuffix
        return (pref, ch, suff)
  aligns' <- sepEndBy alignSpec maybeBar
  spaces
  char '}'
  spaces
  return $ aligns'

hline :: PandocMonad m => LP m ()
hline = try $ do
  spaces'
  controlSeq "hline" <|>
    -- booktabs rules:
    controlSeq "toprule" <|>
    controlSeq "bottomrule" <|>
    controlSeq "midrule" <|>
    controlSeq "endhead" <|>
    controlSeq "endfirsthead"
  spaces'
  optional $ bracketed (many1 (satisfy (/=']')))
  return ()

lbreak :: PandocMonad m => LP m ()
lbreak = () <$ try (spaces' *>
                    (controlSeq "\\" <|> controlSeq "tabularnewline") <*
                    spaces')

amp :: PandocMonad m => LP m ()
amp = () <$ try (spaces' *> char '&' <* spaces')

parseTableRow :: PandocMonad m
              => Int  -- ^ number of columns
              -> [String] -- ^ prefixes
              -> [String] -- ^ suffixes
              -> LP m [Blocks]
parseTableRow cols prefixes suffixes = try $ do
  let tableCellRaw = many (notFollowedBy
                       (amp <|> lbreak <|>
                         (() <$ try (string "\\end"))) >> anyChar)
  let minipage = try $ controlSeq "begin" *> string "{minipage}" *>
          env "minipage"
          (skipopts *> spaces' *> optional braced *> spaces' *> blocks)
  let tableCell = minipage <|>
            ((plain . trimInlines . mconcat) <$> many inline)
  rawcells <- sepBy1 tableCellRaw amp
  guard $ length rawcells == cols
  let rawcells' = zipWith3 (\c p s -> p ++ trim c ++ s)
                      rawcells prefixes suffixes
  cells' <- mapM (parseFromString tableCell) rawcells'
  let numcells = length cells'
  guard $ numcells <= cols && numcells >= 1
  guard $ cells' /= [mempty]
  -- note:  a & b in a three-column table leaves an empty 3rd cell:
  let cells'' = cells' ++ replicate (cols - numcells) mempty
  spaces'
  return cells''

spaces' :: PandocMonad m => LP m ()
spaces' = spaces *> skipMany (comment *> spaces)

simpTable :: PandocMonad m => Bool -> LP m Blocks
simpTable hasWidthParameter = try $ do
  when hasWidthParameter $ () <$ (spaces' >> tok)
  skipopts
  (prefixes, aligns, suffixes) <- unzip3 <$> parseAligns
  let cols = length aligns
  optional $ controlSeq "caption" *> skipopts *> setCaption
  optional lbreak
  spaces'
  skipMany hline
  spaces'
  header' <- option [] $ try (parseTableRow cols prefixes suffixes <*
                                   lbreak <* many1 hline)
  spaces'
  rows <- sepEndBy (parseTableRow cols prefixes suffixes)
                    (lbreak <* optional (skipMany hline))
  spaces'
  optional $ controlSeq "caption" *> skipopts *> setCaption
  optional lbreak
  spaces'
  let header'' = if null header'
                    then replicate cols mempty
                    else header'
  lookAhead $ controlSeq "end" -- make sure we're at end
  return $ table mempty (zip aligns (repeat 0)) header'' rows

removeDoubleQuotes :: String -> String
removeDoubleQuotes ('"':xs) =
  case reverse xs of
       '"':ys -> reverse ys
       _      -> '"':xs
removeDoubleQuotes xs = xs