65 files changed, 4123 insertions, 3180 deletions
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.BibTeX
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -23,41 +23,47 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index f0edcaa16..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -2,8 +2,8 @@
 
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-   Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Module      : Text.Pandoc.Readers.CSV
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index c1773eaab..411d64278 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,8 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -25,17 +26,66 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Options
 import Text.Pandoc.Error
+import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
+import Data.Typeable
+import Text.Pandoc.Parsing (runParserT, getInput,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
+import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMark opts s = do
-  let res = runIdentity $
-              commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s
-  case res of
-    Left err -> throwError $ PandocParsecError s err
-    Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
+readCommonMark opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          rest <- getInput
+                          return (meta, rest))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, rest) -> do
+        -- strip off metadata section and parse body
+        let body = concatMap sourceToToks (unSources rest)
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
+
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
+  | isEnabled Ext_sourcepos opts =
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+  | otherwise =
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+specFor :: (Monad m, Typeable m, Typeable a,
+            Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
+        => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks)
+specFor opts = foldr ($) defaultSyntaxSpec exts
  where
   exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++
          [ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++
@@ -62,5 +112,7 @@ readCommonMark opts s = do
            | isEnabled Ext_implicit_header_references opts ] ++
          [ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++
          [ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++
-         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ]
+         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++
+         [ (rebaseRelativePathsSpec <>)
+           | isEnabled Ext_rebase_relative_paths opts ]
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..c49b82ccf 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.DocBook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -12,23 +12,28 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
 import Data.Generics
 import Data.List (intersperse,elemIndex)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
 {-
 
@@ -92,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] chapterinfo - Meta-information for a Chapter
 [ ] citation - An inline bibliographic reference to another published work
 [ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
+[x] citerefentry - A citation to a reference page
 [ ] citetitle - The title of a cited work
 [ ] city - The name of a city in an address
 [x] classname - The name of a class, in the object-oriented programming sense
@@ -129,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] corpcredit - A corporation or organization credited in a document
 [ ] corpname - The name of a corporation
 [ ] country - The name of a country
+[x] danger - An admonition set off from the text indicating hazardous situation
 [ ] database - The name of a database, or part of a database
 [x] date - The date of publication or revision of a document
 [ ] dedication - A wrapper for the dedication section of a book
@@ -206,7 +212,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] info - A wrapper for information about a component or other block. (DocBook v5)
 [x] informalequation - A displayed mathematical equation without a title
 [x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
+[x] informalfigure - An untitled figure
 [ ] informaltable - A table without a title
 [ ] initializer - The initializer for a FieldSynopsis
 [x] inlineequation - A mathematical equation or expression occurring inline
@@ -535,24 +541,32 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  let sources = toSources inp
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)
 
 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@@ -565,32 +579,14 @@ getFigure e = do
   modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty }
   return res
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- convenience function
 named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
 
 --
 
@@ -605,16 +601,24 @@ addMetadataFromElement e = do
          Nothing -> return ()
          Just z  -> addMetaField "author" z
     addMetaField "subtitle" e
-    addMetaField "author" e
+    addAuthor e
     addMetaField "date" e
     addMetaField "release" e
     addMetaField "releaseinfo" e
     return mempty
-  where addMetaField fieldname elt =
-            case filterChildren (named fieldname) elt of
-                   []  -> return ()
-                   [z] -> getInlines z >>= addMeta fieldname
-                   zs  -> mapM getInlines zs >>= addMeta fieldname
+  where
+   addAuthor elt =
+     case filterChildren (named "author") elt of
+       [] -> return ()
+       [z] -> fromAuthor z >>= addMeta "author"
+       zs  -> mapM fromAuthor zs >>= addMeta "author"
+   fromAuthor elt =
+     mconcat . intersperse space <$> mapM getInlines (elChildren elt)
+   addMetaField fieldname elt =
+     case filterChildren (named fieldname) elt of
+       []  -> return ()
+       [z] -> getInlines z >>= addMeta fieldname
+       zs  -> mapM getInlines zs >>= addMeta fieldname
 
 addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
 addMeta field val = modify (setMeta field val)
@@ -627,7 +631,7 @@ isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blockTags
 isBlockElement _ = False
 
-blockTags :: [String]
+blockTags :: [Text]
 blockTags =
   [ "abstract"
   , "ackno"
@@ -669,6 +673,7 @@ blockTags =
   , "index"
   , "info"
   , "informalexample"
+  , "informalfigure"
   , "informaltable"
   , "itemizedlist"
   , "linegroup"
@@ -713,8 +718,8 @@ blockTags =
   , "variablelist"
   ] ++ admonitionTags
 
-admonitionTags :: [String]
-admonitionTags = ["important","caution","note","tip","warning"]
+admonitionTags :: [Text]
+admonitionTags = ["caution","danger","important","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
 trimNl :: Text -> Text
@@ -736,9 +741,9 @@ getMediaobject e = do
   figTitle <- gets dbFigureTitle
   ident <- gets dbFigureId
   (imageUrl, attr) <-
-    case filterChild (named "imageobject") e of
-      Nothing  -> return (mempty, nullAttr)
-      Just z   -> case filterChild (named "imagedata") z of
+    case filterElements (named "imageobject") e of
+      []  -> return (mempty, nullAttr)
+      (z:_) -> case filterChild (named "imagedata") z of
                     Nothing -> return (mempty, nullAttr)
                     Just i  -> let atVal a = attrValue a i
                                    w = case atVal "width" of
@@ -771,10 +776,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -829,7 +834,7 @@ parseBlock (Elem e) =
         "refsect2" -> sect 2
         "refsect3" -> sect 3
         "refsection" -> gets dbSectionLevel >>= sect . (+1)
-        l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+        l | l `elem` admonitionTags -> parseAdmonition l
         "area" -> skip
         "areaset" -> skip
         "areaspec" -> skip
@@ -855,6 +860,7 @@ parseBlock (Elem e) =
         "variablelist" -> definitionList <$> deflistitems
         "procedure" -> bulletList <$> steps
         "figure" -> getFigure e
+        "informalfigure" -> getFigure e
         "mediaobject" -> para <$> getMediaobject e
         "caption" -> skip
         "info" -> addMetadataFromElement e
@@ -890,7 +896,11 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          codeBlockWithLang = do
@@ -898,7 +908,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ T.pack $ strContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -952,17 +962,16 @@ parseBlock (Elem e) =
                             w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x ->
                                                      (x >= '0' && x <= '9')
-                                                      || x == '.') (T.pack w)
+                                                      || x == '.') w
                             if n > 0 then Just n else Nothing
-                      let numrows = case bodyrows of
-                                         [] -> 0
-                                         xs -> maximum $ map length xs
+                      let numrows = maybe 0 maximum $ nonEmpty
+                                                    $ map length bodyrows
                       let aligns = case colspecs of
                                      [] -> replicate numrows AlignDefault
                                      cs -> map toAlignment cs
                       let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                    || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                         Just d  -> case attrValue "table-width" d of
                                                      "" -> 1.0
                                                      w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1035,12 +1044,12 @@ parseMixed container conts = do
       x <- parseMixed container rs
       return $ p <> b <> x
 
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
 parseRow cn = do
   let isEntry x  = named "entry" x || named "td" x || named "th" x
   mapM (parseEntry cn) . filterChildren isEntry
 
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
 parseEntry cn el = do
   let colDistance sa ea = do
         let iStrt = elemIndex sa cn
@@ -1062,7 +1071,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = trimInlines . mconcat <$>
                  mapM parseInline (elContent e')
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -1071,16 +1080,16 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
-  return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+  return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "anchor" -> do
            return $ spanWith (attrValue "id" e, [], []) mempty
         "phrase" -> do
           let ident = attrValue "id" e
-          let classes = T.words $ attrValue "class" e
+          let classes = T.words $ attrValue "role" e
           if ident /= "" || classes /= []
             then innerInlines (spanWith (ident,classes,[]))
             else innerInlines id
@@ -1103,6 +1112,10 @@ parseInline (Elem e) =
         "segmentedlist" -> segmentedList
         "classname" -> codeWithLang
         "code" -> codeWithLang
+        "citerefentry" -> do
+             let title = maybe mempty strContent $ filterChild (named "refentrytitle") e
+             let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e
+             return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum)
         "filename" -> codeWithLang
         "envar" -> codeWithLang
         "literal" -> codeWithLang
@@ -1125,7 +1138,7 @@ parseInline (Elem e) =
         "userinput" -> codeWithLang
         "systemitem" -> codeWithLang
         "varargs" -> return $ code "(...)"
-        "keycap" -> return (str $ T.pack $ strContent e)
+        "keycap" -> return (str $ strContent e)
         "keycombo" -> keycombo <$>
                          mapM parseInline (elContent e)
         "menuchoice" -> menuchoice <$>
@@ -1137,17 +1150,17 @@ parseInline (Elem e) =
             let title = case attrValue "endterm" e of
                             ""      -> maybe "???" xrefTitleByElem
                                          (findElementById linkend content)
-                            endterm -> maybe "???" (T.pack . strContent)
+                            endterm -> maybe "???" strContent
                                          (findElementById endterm content)
             return $ link ("#" <> linkend) "" (text title)
-        "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
-                          $ str $ T.pack $ strContent e
-        "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+        "email" -> return $ link ("mailto:" <> strContent e) ""
+                          $ str $ strContent e
+        "uri" -> return $ link (strContent e) "" $ str $ strContent e
         "ulink" -> innerInlines (link (attrValue "url" e) "")
         "link" -> do
              ils <- innerInlines id
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "linkend" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1163,12 +1176,15 @@ parseInline (Elem e) =
         "title" -> return mempty
         "affiliation" -> skip
         -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          innerInlines f = extractSpaces f . mconcat <$>
@@ -1177,7 +1193,7 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
          simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
                          (filterChildren (named "member") e)
          segmentedList = do
@@ -1218,10 +1234,10 @@ parseInline (Elem e) =
                   "sect5"        -> descendantContent "title" el
                   "cmdsynopsis"  -> descendantContent "command" el
                   "funcsynopsis" -> descendantContent "function" el
-                  _              -> T.pack $ qName (elName el) ++ "_title"
+                  _              -> qName (elName el) <> "_title"
           where
             xrefLabel = attrValue "xreflabel" el
-            descendantContent name = maybe "???" (T.pack . strContent)
+            descendantContent name = maybe "???" strContent
                                    . filterElementName (\n -> qName n == name)
 
 -- | Extract a math equation from an element
@@ -1241,8 +1257,9 @@ equation e constructor =
   where
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
-      (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
-      (readMathML . T.pack . showElement)
+      (\x -> qName (elName x) == "math" &&
+             qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML")
+      (readMathML . showElement)
 
     latexEquations :: [Text]
     latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1256,8 +1273,8 @@ equation e constructor =
 -- | Get the actual text stored in a CData block. 'showContent'
 -- returns the text still surrounded by the [[CDATA]] tags.
 showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
 
 
 -- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00de6a0cd..c06adf7e3 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
 import Codec.Archive.Zip
 import Control.Monad.Reader
 import Control.Monad.State.Strict
+import Data.Bifunctor (bimap, first)
 import qualified Data.ByteString.Lazy as B
 import Data.Default (Default)
-import Data.List (delete, intersect)
+import Data.List (delete, intersect, foldl')
 import Data.Char (isSpace)
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Data.Maybe (isJust, fromMaybe)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
 import Data.Sequence (ViewL (..), viewl)
 import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
@@ -85,6 +86,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
+import Data.List.NonEmpty (nonEmpty)
 
 readDocx :: PandocMonad m
          => ReaderOptions
@@ -112,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
                      -- restarting
                      , docxListState :: M.Map (T.Text, T.Text) Integer
                      , docxPrevPara  :: Inlines
+                     , docxTableCaptions :: [Blocks]
                      }
 
 instance Default DState where
@@ -122,6 +125,7 @@ instance Default DState where
                , docxDropCap   = mempty
                , docxListState = M.empty
                , docxPrevPara  = mempty
+               , docxTableCaptions = []
                }
 
 data DEnv = DEnv { docxOptions       :: ReaderOptions
@@ -490,15 +494,32 @@ singleParaToPlain blks
       singleton $ Plain ils
 singleParaToPlain blks = blks
 
-cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
-cellToBlocks (Docx.Cell bps) = do
+cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell
+cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do
   blks <- smushBlocks <$> mapM bodyPartToBlocks bps
-  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
+
+rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row]
+rowsToRows rows = do
+  let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows)
+  cells <- traverse (traverse (uncurry cellToCell)) rowspans
+  return (fmap (Pandoc.Row nullAttr) cells)
+
+splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row])
+splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
+  $ if hasFirstRowFormatting
+    then foldl' f ((take 1 rs, []), True) (drop 1 rs)
+    else foldl' f (([], []), False) rs
+  where
+    f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs)
+      | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
+        = ((r : headerRows, bodyRows), True)
+      | otherwise
+        = ((headerRows, r : bodyRows), False)
+
+    isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue
 
-rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
-rowToBlocksList (Docx.Row cells) = do
-  blksList <- mapM cellToBlocks cells
-  return $ map singleParaToPlain blksList
 
 -- like trimInlines, but also take out linebreaks
 trimSps :: Inlines -> Inlines
@@ -545,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
   where go c | isSpace c = '-'
              | otherwise = c
 
+bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks)
+bodyPartToTableCaption (TblCaption pPr parparts) =
+  Just <$> bodyPartToBlocks (Paragraph pPr parparts)
+bodyPartToTableCaption _ = pure Nothing
+
 bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
 bodyPartToBlocks (Paragraph pPr parparts)
   | Just True <- pBidi pPr = do
@@ -636,54 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
   let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
   in
     bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (TblCaption _ _) =
+  return $ para mempty -- collected separately
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
-  let cap' = simpleCaption $ plain $ text cap
-      (hdr, rows) = case firstRowFormatting look of
-        True | null rs -> (Nothing, [r])
-             | otherwise -> (Just r, rs)
-        False -> (Nothing, r:rs)
-
-  cells <- mapM rowToBlocksList rows
+bodyPartToBlocks (Tbl cap grid look parts) = do
+  captions <- gets docxTableCaptions
+  fullCaption <- case captions of
+    c : cs -> do
+      modify (\s -> s { docxTableCaptions = cs })
+      return c
+    [] -> return $ if T.null cap then mempty else plain (text cap)
+  let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
+      cap' = caption shortCaption fullCaption
+      (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
-      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
-      -- our own, see
-      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
-      nonEmpty [] = Nothing
-      nonEmpty l  = Just l
       rowLength :: Docx.Row -> Int
-      rowLength (Docx.Row c) = length c
+      rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c)
 
-  let toRow = Pandoc.Row nullAttr . map simpleCell
-      toHeaderRow l = [toRow l | not (null l)]
+  headerCells <- rowsToRows hdr
+  bodyCells <- rowsToRows rows
 
-  -- pad cells.  New Text.Pandoc.Builder will do that for us,
-  -- so this is for compatibility while we switch over.
-  let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
-
-  hdrCells <- case hdr of
-    Just r' -> toHeaderRow <$> rowToBlocksList r'
-    Nothing -> return []
-
-      -- The two following variables (horizontal column alignment and
-      -- relative column widths) go to the default at the
-      -- moment. Width information is in the TblGrid field of the Tbl,
-      -- so should be possible. Alignment might be more difficult,
-      -- since there doesn't seem to be a column entity in docx.
+      -- Horizontal column alignment goes to the default at the moment. Getting
+      -- it might be difficult, since there doesn't seem to be a column entity
+      -- in docx.
   let alignments = replicate width AlignDefault
-      widths = replicate width ColWidthDefault
+      totalWidth = sum grid
+      widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
-                 (TableHead nullAttr hdrCells)
-                 [TableBody nullAttr 0 [] cells']
+                 (TableHead nullAttr headerCells)
+                 [TableBody nullAttr 0 [] bodyCells]
                  (TableFoot nullAttr [])
 bodyPartToBlocks (OMathPara e) =
   return $ para $ displayMath (writeTeX e)
 
-
 -- replace targets with generated anchors.
 rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
 rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
@@ -719,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
 bodyToOutput (Body bps) = do
   let (metabps, blkbps) = sepBodyParts bps
   meta <- bodyPartsToMeta metabps
+  captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+  modify (\s -> s { docxTableCaptions = captions })
   blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
   blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
   blks'' <- removeOrphanAnchors blks'
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..6e4faa639 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.Combine
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2020 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
@@ -61,7 +61,7 @@ import Data.List
 import Data.Bifunctor
 import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
                      , (><), (|>) )
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 
 data Modifier a = Modifier (a -> a)
                 | AttrModifier (Attr -> a -> a) Attr
@@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of
 
 inlinesL :: Inlines -> (Inlines, Inlines)
 inlinesL ils = case viewl $ unMany ils of
-  (s :< sq) -> (singleton s, Many sq)
+  (s :< sq) -> (B.singleton s, Many sq)
   _         -> (mempty, ils)
 
 inlinesR :: Inlines -> (Inlines, Inlines)
 inlinesR ils = case viewr $ unMany ils of
-  (sq :> s) -> (Many sq, singleton s)
+  (sq :> s) -> (Many sq, B.singleton s)
   _         -> (ils, mempty)
 
 combineInlines :: Inlines -> Inlines -> Inlines
@@ -182,7 +182,7 @@ isAttrModifier _                  = False
 
 smushInlines :: [Inlines] -> Inlines
 smushInlines xs = combineInlines xs' mempty
-  where xs' = foldl combineInlines mempty xs
+  where xs' = foldl' combineInlines mempty xs
 
 smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..dbb16a821 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , ParStyle
                                       , CharStyle(cStyleData)
                                       , Row(..)
+                                      , TblHeader(..)
                                       , Cell(..)
+                                      , VMerge(..)
                                       , TrackedChange(..)
                                       , ChangeType(..)
                                       , ChangeInfo(..)
@@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , pHeading
                                       , constructBogusParStyleData
                                       , leftBiasedMergeRunStyle
+                                      , rowsToRowspans
                                       ) where
 import Text.Pandoc.Readers.Docx.Parse.Styles
 import Codec.Archive.Zip
@@ -63,6 +66,7 @@ import Data.Char (chr, ord, readLitChar)
 import Data.List
 import qualified Data.Map as M
 import qualified Data.Text as T
+import Data.Text (Text)
 import Data.Maybe
 import System.FilePath
 import Text.Pandoc.Readers.Docx.Util
@@ -72,8 +76,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.TeXMath (Exp)
 import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XML.Light
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -127,37 +130,23 @@ mapD f xs =
   in
    concatMapM handler xs
 
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
   | isElem ns "w" "sdt" element
   , Just sdtContent <- findChildByName ns "w" "sdtContent" element
-  = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+  = concatMap (unwrapElement ns) (elChildren sdtContent)
   | isElem ns "w" "smartTag" element
-  = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+  = concatMap (unwrapElement ns) (elChildren element)
+  | otherwise
+  = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
 
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
-  Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
 
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
-  let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
-  in
-    case XMLC.nextDF modifiedCur of
-      Just cur' -> walkDocument' ns cur'
-      Nothing   -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
 walkDocument ns element =
-  let cur = XMLC.fromContent (Elem element)
-      cur' = walkDocument' ns cur
-  in
-    case XMLC.toTree cur' of
-      Elem element' -> Just element'
-      _             -> Nothing
-
+  element{ elContent = concatMap (unwrapContent ns) (elContent element) }
 
 newtype Docx = Docx Document
           deriving Show
@@ -239,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
 data BodyPart = Paragraph ParagraphStyle [ParPart]
               | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
               | Tbl T.Text TblGrid TblLook [Row]
+              | TblCaption ParagraphStyle [ParPart]
               | OMathPara [Exp]
               deriving Show
 
@@ -250,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool}
 defaultTblLook :: TblLook
 defaultTblLook = TblLook{firstRowFormatting = False}
 
-newtype Row = Row [Cell]
-           deriving Show
+data Row = Row TblHeader [Cell] deriving Show
 
-newtype Cell = Cell [BodyPart]
+data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq)
+
+data Cell = Cell GridSpan VMerge [BodyPart]
             deriving Show
 
+type GridSpan = Integer
+
+data VMerge = Continue
+            -- ^ This cell should be merged with the one above it
+            | Restart
+            -- ^ This cell should not be merged with the one above it
+            deriving (Show, Eq)
+
+rowsToRowspans :: [Row] -> [[(Int, Cell)]]
+rowsToRowspans rows = let
+  removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart))
+  in removeMergedCells (foldr f [] rows)
+  where
+    f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]]
+    f (Row _ cells) acc = let
+      spans = g cells Nothing (listToMaybe acc)
+      in spans : acc
+
+    g ::
+      -- | The current row
+      [Cell] ->
+      -- | Number of columns left below
+      Maybe Integer ->
+      -- | (rowspan so far, cell) for the row below this one
+      Maybe [(Int, Cell)] ->
+      -- | (rowspan so far, cell) for this row
+      [(Int, Cell)]
+    g cells _ Nothing = zip (repeat 1) cells
+    g cells columnsLeftBelow (Just rowBelow) =
+        case cells of
+          [] -> []
+          thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of
+            [] -> zip (repeat 1) cells
+            (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ ->
+              let spanSoFar = case vmerge of
+                    Restart -> 1
+                    Continue -> 1 + spanSoFarBelow
+                  columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow)
+                  (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow
+              in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow)
+
+    dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)])
+    dropColumns n [] = (n, [])
+    dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) =
+      if n < gridSpan
+      then (gridSpan - n, cells)
+      else dropColumns (n - gridSpan) otherCells
+
 leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
 leftBiasedMergeRunStyle a b = RunStyle
     { isBold = isBold a <|> isBold b
@@ -343,10 +382,16 @@ archiveToDocxWithWarnings archive = do
     Right doc -> Right (Docx doc, stateWarnings st)
     Left e    -> Left e
 
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
   entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
   let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
   rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                        Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -354,18 +399,18 @@ getDocumentXmlPath zf = do
   fp <- findAttr (QName "Target" Nothing Nothing) rel
   -- sometimes there will be a leading slash, which windows seems to
   -- have trouble with.
-  return $ case fp of
+  return $ case T.unpack fp of
     '/' : fp' -> fp'
-    _         -> fp
+    fp'       -> fp'
 
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   docPath <- asks envDocXmlPath
   entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
-  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+  let bodyElem' = walkDocument namespaces bodyElem
   body <- elemToBody namespaces bodyElem'
   return $ Document namespaces body
 
@@ -401,23 +446,24 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
   let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      fn_namespaces = maybe [] elemToNameSpaces fnElem
-      en_namespaces = maybe [] elemToNameSpaces enElem
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
-      en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+               >>= parseXMLFromEntry
+      fn_namespaces = maybe mempty elemToNameSpaces fnElem
+      en_namespaces = maybe mempty elemToNameSpaces enElem
+      ns = M.union fn_namespaces en_namespaces
+      fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+      en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
    Notes ns fn en
 
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
-      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+               >>= parseXMLFromEntry
+      cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
+      cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+               cmtsElem
   in
     case cmts of
       Just c  -> Comments cmts_namespaces c
@@ -433,20 +479,26 @@ filePathToRelType path docXmlPath =
   then Just InDocument
   else Nothing
 
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
+relElemToRelationship :: FilePath -> DocumentLocation -> Element
+                      -> Maybe Relationship
+relElemToRelationship fp relType element | qName (elName element) == "Relationship" =
   do
-    relId <- findAttrText (QName "Id" Nothing Nothing) element
-    target <- findAttrText (QName "Target" Nothing Nothing) element
-    return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    -- target may be relative (media/image1.jpeg) or absolute
+    -- (/word/media/image1.jpeg); we need to relativize it (see #7374)
+    let frontOfFp = T.pack $ takeWhile (/= '_') fp
+    let target' = fromMaybe target $
+           T.stripPrefix frontOfFp $ T.dropWhile (== '/') target
+    return $ Relationship relType relId target'
+relElemToRelationship _ _ _ = Nothing
 
 filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
-  mapMaybe (relElemToRelationship relType) $ elChildren relElems
+  , Just relElems <- parseXMLFromEntry entry =
+  mapMaybe (relElemToRelationship fp relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
 archiveToRelationships :: Archive -> FilePath -> [Relationship]
@@ -478,10 +530,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
 loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
 loElemToLevelOverride ns element
   | isElem ns "w" "lvlOverride" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       let startOverride = findChildByName ns "w" "startOverride" element
                           >>= findAttrByName ns "w" "val"
-                          >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                          >>= stringToInteger
           lvl = findChildByName ns "w" "lvl" element
                 >>= levelElemToLevel ns
       return $ LevelOverride ilvl startOverride lvl
@@ -490,9 +542,9 @@ loElemToLevelOverride _ _ = Nothing
 numElemToNum :: NameSpaces -> Element -> Maybe Numb
 numElemToNum ns element
   | isElem ns "w" "num" element = do
-      numId <- findAttrTextByName ns "w" "numId" element
+      numId <- findAttrByName ns "w" "numId" element
       absNumId <- findChildByName ns "w" "abstractNumId" element
-                  >>= findAttrTextByName ns "w" "val"
+                  >>= findAttrByName ns "w" "val"
       let lvlOverrides = mapMaybe
                          (loElemToLevelOverride ns)
                          (findChildrenByName ns "w" "lvlOverride" element)
@@ -502,7 +554,7 @@ numElemToNum _ _ = Nothing
 absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
 absNumElemToAbsNum ns element
   | isElem ns "w" "abstractNum" element = do
-      absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+      absNumId <- findAttrByName ns "w" "abstractNumId" element
       let levelElems = findChildrenByName ns "w" "lvl" element
           levels = mapMaybe (levelElemToLevel ns) levelElems
       return $ AbstractNumb absNumId levels
@@ -511,23 +563,23 @@ absNumElemToAbsNum _ _ = Nothing
 levelElemToLevel :: NameSpaces -> Element -> Maybe Level
 levelElemToLevel ns element
   | isElem ns "w" "lvl" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       fmt <- findChildByName ns "w" "numFmt" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       txt <- findChildByName ns "w" "lvlText" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       let start = findChildByName ns "w" "start" element
                   >>= findAttrByName ns "w" "val"
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                  >>= stringToInteger
       return (Level ilvl fmt txt start)
 levelElemToLevel _ _ = Nothing
 
 archiveToNumbering' :: Archive -> Maybe Numbering
 archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
-    Nothing -> Just $ Numbering [] [] []
+    Nothing -> Just $ Numbering mempty [] []
     Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
           numElems = findChildrenByName namespaces "w" "num" numberingElem
           absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
@@ -537,13 +589,13 @@ archiveToNumbering' zf =
 
 archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
-  fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+  fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
   | isElem ns "w" (notetype <> "s") element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" notetype element)
       in
@@ -555,7 +607,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
 elemToComments ns element
   | isElem ns "w" "comments" element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" "comment" element)
       in
@@ -570,7 +622,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
 elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
   let cols = findChildrenByName ns "w" "gridCol" element
   in
-   mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+   mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
    cols
 elemToTblGrid _ _ = throwError WrongElem
 
@@ -594,14 +646,31 @@ elemToRow ns element | isElem ns "w" "tr" element =
   do
     let cellElems = findChildrenByName ns "w" "tc" element
     cells <- mapD (elemToCell ns) cellElems
-    return $ Row cells
+    let hasTblHeader = maybe NoTblHeader (const HasTblHeader)
+          (findChildByName ns "w" "trPr" element
+           >>= findChildByName ns "w" "tblHeader")
+    return $ Row hasTblHeader cells
 elemToRow _ _ = throwError WrongElem
 
 elemToCell :: NameSpaces -> Element -> D Cell
 elemToCell ns element | isElem ns "w" "tc" element =
   do
+    let properties = findChildByName ns "w" "tcPr" element
+    let gridSpan = properties
+                     >>= findChildByName ns "w" "gridSpan"
+                     >>= findAttrByName ns "w" "val"
+                     >>= stringToInteger
+    let vMerge = case properties >>= findChildByName ns "w" "vMerge" of
+                   Nothing -> Restart
+                   Just e ->
+                     fromMaybe Continue $ do
+                       s <- findAttrByName ns "w" "val" e
+                       case s of
+                         "continue" -> Just Continue
+                         "restart" -> Just Restart
+                         _ -> Nothing
     cellContents <- mapD (elemToBodyPart ns) (elChildren element)
-    return $ Cell cellContents
+    return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
 elemToCell _ _ = throwError WrongElem
 
 elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
@@ -615,12 +684,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
       stringToInteger
     , hangingParIndent =
       findAttrByName ns "w" "hanging" element >>=
-      stringToInteger}
+      stringToInteger }
 elemToParIndentation _ _ = Nothing
 
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+  case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> (n' .|. n) /= 0
 
@@ -633,10 +702,9 @@ pNumInfo = getParStyleField numInfo . pStyle
 elemToBodyPart :: NameSpaces -> Element -> D BodyPart
 elemToBodyPart ns element
   | isElem ns "w" "p" element
-  , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
-      do
-        expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
-        return $ OMathPara expsLst
+  , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do
+      expsLst <- eitherToD $ readOMML $ showElement c
+      return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
   , Just (numId, lvl) <- getNumInfo ns element = do
@@ -654,13 +722,31 @@ elemToBodyPart ns element
         Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
                     levelInfo <- lookupLevel numId lvl <$> asks envNumbering
                     return $ ListItem parstyle numId lvl levelInfo parparts
-        _ -> return $ Paragraph parstyle parparts
+        _ -> let
+          hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+          hasSimpleTableField = fromMaybe False $ do
+            fldSimple <- findChildByName ns "w" "fldSimple" element
+            instr <- findAttrByName ns "w" "instr" fldSimple
+            pure ("Table" `elem` T.words instr)
+
+          hasComplexTableField = fromMaybe False $ do
+            instrText <- findElementByName ns "w" "instrText" element
+            pure ("Table" `elem` T.words (strContent instrText))
+
+          in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+             then return $ TblCaption parstyle parparts
+             else return $ Paragraph parstyle parparts
+
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
-    let caption' = findChildByName ns "w" "tblPr" element
+    let tblProperties = findChildByName ns "w" "tblPr" element
+        caption = fromMaybe "" $ tblProperties
                    >>= findChildByName ns "w" "tblCaption"
-                   >>= findAttrTextByName ns "w" "val"
-        caption = fromMaybe "" caption'
+                   >>= findAttrByName ns "w" "val"
+        description = fromMaybe "" $ tblProperties
+                       >>= findChildByName ns "w" "tblDescription"
+                       >>= findAttrByName ns "w" "val"
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
           Nothing -> return []
@@ -673,7 +759,7 @@ elemToBodyPart ns element
     grid <- grid'
     tblLook <- tblLook'
     rows <- mapD (elemToRow ns) (elChildren element)
-    return $ Tbl caption grid tblLook rows
+    return $ Tbl (caption <> description) grid tblLook rows
 elemToBodyPart _ _ = throwError WrongElem
 
 lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
@@ -698,8 +784,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
 getTitleAndAlt ns element =
   let mbDocPr = findChildByName ns "wp" "inline" element >>=
                 findChildByName ns "wp" "docPr"
-      title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
-      alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+      title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+      alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -711,22 +797,29 @@ elemToParPart ns element
   = let (title, alt) = getTitleAndAlt ns drawingElem
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrTextByName ns "r" "embed"
+                  >>= findAttrByName ns "r" "embed"
     in
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
        Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
 elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
     let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrTextByName ns "r" "id"
+                  >>= findAttrByName ns "r" "id"
     in
      case drawing of
-       -- Todo: check out title and attr for deprecated format.
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
        Nothing -> throwError WrongElem
+elemToParPart ns element
+  | isElem ns "w" "r" element
+  , Just objectElem <- findChildByName ns "w" "object" element
+  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
 -- Chart
 elemToParPart ns element
   | isElem ns "w" "r" element
@@ -790,7 +883,7 @@ elemToParPart ns element
       fldCharState <- gets stateFldCharState
       case fldCharState of
         FldCharOpen -> do
-          info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+          info <- eitherToD $ parseFieldInfo $ strContent instrText
           modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
           return NullParPart
         _ -> return NullParPart
@@ -811,48 +904,48 @@ elemToParPart ns element
       return $ ChangedRuns change runs
 elemToParPart ns element
   | isElem ns "w" "bookmarkStart" element
-  , Just bmId <- findAttrTextByName ns "w" "id" element
-  , Just bmName <- findAttrTextByName ns "w" "name" element =
+  , Just bmId <- findAttrByName ns "w" "id" element
+  , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just relId <- findAttrTextByName ns "r" "id" element = do
+  , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
     runs <- mapD (elemToRun ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
-         case findAttrTextByName ns "w" "anchor" element of
+         case findAttrByName ns "w" "anchor" element of
              Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
              Nothing -> return $ ExternalHyperLink target runs
       Nothing     -> return $ ExternalHyperLink "" runs
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+  , Just anchor <- findAttrByName ns "w" "anchor" element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ InternalHyperLink anchor runs
 elemToParPart ns element
   | isElem ns "w" "commentRangeStart" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "commentRangeEnd" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element =
+  , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
 elemToParPart ns element
   | isElem ns "m" "oMath" element =
-    fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+    fmap PlainOMath (eitherToD $ readOMML $ showElement element)
 elemToParPart _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
   | isElem ns "w" "comment" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element
-  , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
-  , cmtDate <- findAttrTextByName ns "w" "date" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element
+  , Just cmtAuthor <- findAttrByName ns "w" "author" element
+  , cmtDate <- findAttrByName ns "w" "date" element = do
       bps <- mapD (elemToBodyPart ns) (elChildren element)
       return $ CommentStart cmtId cmtAuthor cmtDate bps
 elemToCommentStart _ _ = throwError WrongElem
@@ -871,7 +964,7 @@ elemToExtent drawingElem =
     where
       wp_ns  = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
-                    >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+                    >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
 
 childElemToRun :: NameSpaces -> Element -> D Run
@@ -882,7 +975,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
@@ -895,7 +988,7 @@ childElemToRun ns element
   = return InlineChart
 childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
-  , Just fnId <- findAttrTextByName ns "w" "id" element = do
+  , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -903,7 +996,7 @@ childElemToRun ns element
       Nothing  -> return $ Footnote []
 childElemToRun ns element
   | isElem ns "w" "endnoteReference" element
-  , Just enId <- findAttrTextByName ns "w" "id" element = do
+  , Just enId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -956,15 +1049,15 @@ getParStyleField _ _ = Nothing
 getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
 getTrackedChange ns element
   | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange ns element
   | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange _ _ = Nothing
 
@@ -973,7 +1066,7 @@ elemToParagraphStyle ns element sty
   | Just pPr <- findChildByName ns "w" "pPr" element =
     let style =
           mapMaybe
-          (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+          (fmap ParaStyleId . findAttrByName ns "w" "val")
           (findChildrenByName ns "w" "pStyle" pPr)
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
@@ -1005,7 +1098,7 @@ elemToRunStyleD ns element
     charStyles <- asks envCharStyles
     let parentSty =
           findChildByName ns "w" "rStyle" rPr >>=
-          findAttrTextByName ns "w" "val" >>=
+          findAttrByName ns "w" "val" >>=
           flip M.lookup charStyles . CharStyleId
     return $ elemToRunStyle ns element parentSty
 elemToRunStyleD _ _ = return defaultRunStyle
@@ -1015,7 +1108,7 @@ elemToRunElem ns element
   | isElem ns "w" "t" element
     || isElem ns "w" "delText" element
     || isElem ns "m" "t" element = do
-    let str = T.pack $ strContent element
+    let str = strContent element
     font <- asks envFont
     case font of
       Nothing -> return $ TextRun str
@@ -1037,14 +1130,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-    case readLitChar ("\\x" ++ s) of
+    case readLitChar ("\\x" ++ T.unpack s) of
          [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
          _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
-    getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
-    lowerFromPrivate ('F':xs) = '0':xs
-    lowerFromPrivate xs       = xs
+    getFont = textToFont =<< findAttrByName ns "w" "font" element
+    lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+                       | otherwise             = t
 getSymChar _ _ = TextRun ""
 
 elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1054,8 +1147,9 @@ elemToRunElems ns element
        let qualName = elemName ns "w"
        let font = do
                     fontElem <- findElement (qualName "rFonts") element
-                    textToFont . T.pack =<<
-                       foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+                    textToFont =<<
+                       foldr ((<|>) . (flip findAttr fontElem . qualName))
+                         Nothing ["ascii", "hAnsi"]
        local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
 elemToRunElems _ _ = throwError WrongElem
 
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,11 +48,13 @@ import Data.Function (on)
 import Data.String (IsString(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
 import Data.Maybe
 import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -108,7 +110,7 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          , isRTL        :: Maybe Bool
                          , isForceCTL   :: Maybe Bool
                          , rVertAlign   :: Maybe VertAlign
-                         , rUnderline   :: Maybe String
+                         , rUnderline   :: Maybe Text
                          , rParentStyle :: Maybe CharStyle
                          }
                 deriving Show
@@ -135,19 +137,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                            , rParentStyle = Nothing
                            }
 
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)
 
 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
@@ -155,7 +160,7 @@ isBasedOnStyle ns element parentStyle
   , Just styleType <- findAttrByName ns "w" "type" element
   , styleType == cStyleType parentStyle
   , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
-                       findAttrTextByName ns "w" "val"
+                       findAttrByName ns "w" "val"
   , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
   | isElem ns "w" "style" element
   , Just styleType <- findAttrByName ns "w" "type" element
@@ -165,7 +170,7 @@ isBasedOnStyle ns element parentStyle
   | otherwise = False
 
 class HasStyleId a => ElemToStyle a where
-  cStyleType  :: Maybe a -> String
+  cStyleType  :: Maybe a -> Text
   elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
 
 class FromStyleId (StyleId a) => HasStyleId a where
@@ -222,8 +227,10 @@ buildBasedOnList ns element rootStyle =
     stys -> stys ++
             concatMap (buildBasedOnList ns element . Just) stys
 
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+                      Right (x,_) -> Just x
+                      Left _      -> Nothing
 
 checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
 checkOnOff ns rPr tag
@@ -243,7 +250,7 @@ checkOnOff _ _ _ = Nothing
 elemToCharStyle :: NameSpaces
                 -> Element -> Maybe CharStyle -> Maybe CharStyle
 elemToCharStyle ns element parentStyle
-  = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+  = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
               <*> getElementStyleName ns element
               <*> Just (elemToRunStyle ns element parentStyle)
 
@@ -277,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
 getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
 getHeaderLevel ns element
   | Just styleName <- getElementStyleName ns element
-  , Just n <- stringToInteger . T.unpack =<<
+  , Just n <- stringToInteger =<<
               (T.stripPrefix "heading " . T.toLower $
                 fromStyleName styleName)
   , n > 0 = Just (styleName, fromInteger n)
@@ -285,8 +292,8 @@ getHeaderLevel _ _ = Nothing
 
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
-  ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
-  <|> findAttrTextByName ns "w" "styleId" el)
+  ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+  <|> findAttrByName ns "w" "styleId" el)
 
 getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
 getNumInfo ns element = do
@@ -294,15 +301,15 @@ getNumInfo ns element = do
               findChildByName ns "w" "numPr"
       lvl = fromMaybe "0" (numPr >>=
                            findChildByName ns "w" "ilvl" >>=
-                           findAttrTextByName ns "w" "val")
+                           findAttrByName ns "w" "val")
   numId <- numPr >>=
            findChildByName ns "w" "numId" >>=
-           findAttrTextByName ns "w" "val"
+           findAttrByName ns "w" "val"
   return (numId, lvl)
 
 elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
 elemToParStyleData ns element parentStyle
-  | Just styleId <- findAttrTextByName ns "w" "styleId" element
+  | Just styleId <- findAttrByName ns "w" "styleId" element
   , Just styleName <- getElementStyleName ns element
   = Just $ ParStyle
       {
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..970697a2d 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
@@ -18,51 +19,52 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
-                                      , findAttrText
+                                      , findElementByName
                                       , findAttrByName
-                                      , findAttrTextByName
                                       ) where
 
-import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
+import qualified Data.Map as M
 
-type NameSpaces = [(String, String)]
+type NameSpaces = M.Map Text Text
 
 elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+elemToNameSpaces = foldr (\(Attr qn val) ->
+                             case qn of
+                               QName s _ (Just "xmlns") -> M.insert s val
+                               _ -> id) mempty . elAttribs
 
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _                                     = Nothing
-
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (M.lookup prefix ns)
+             (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
-  let ns' = ns ++ elemToNameSpaces element
+  let ns' = ns <> elemToNameSpaces element
   in qName (elName element) == name &&
-     qURI (elName element) == lookup prefix ns'
+     qURI (elName element) == M.lookup prefix ns'
 
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
+-- | Like 'findChildrenByName', but searches descendants.
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findElement (elemName ns' pref name) el
 
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 336be09e5..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
@@ -317,7 +318,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
 interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
-interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
+interwikiToUrl unknown page = unknown <> ">" <> page
 
 linkText :: PandocMonad m => DWParser m B.Inlines
 linkText = parseLink fromRaw "[[" "]]"
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,14 +17,14 @@ module Text.Pandoc.Readers.EPUB
   (readEPUB)
   where
 
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                           toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Map as M (Map, elems, fromList, lookup)
 import Data.Maybe (mapMaybe)
 import qualified Data.Text.Lazy as TL
@@ -40,12 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
 
 readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
 readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -125,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
 imageMimes :: [MimeType]
 imageMimes = ["image/gif", "image/jpeg", "image/png"]
 
-type CoverId = String
+type CoverId = Text
 
 type CoverImage = FilePath
 
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
 parseManifest content coverId = do
   manifest <- findElementE (dfName "manifest") content
   let items = findChildren (dfName "item") manifest
   r <- mapM parseItem items
   let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
-  return (cover `mplus` coverId, M.fromList r)
+  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
   where
-    findCover e = maybe False (isInfixOf "cover-image")
+    findCover e = maybe False (T.isInfixOf "cover-image")
                   (findAttr (emptyName "properties") e)
                || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
     parseItem e = do
       uid <- findAttrE (emptyName "id") e
       href <- findAttrE (emptyName "href") e
       mime <- findAttrE (emptyName "media-type") e
-      return (uid, (href, T.pack mime))
+      return (uid, (T.unpack href, mime))
 
 parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
 parseSpine is e = do
@@ -172,25 +173,25 @@ parseMeta content = do
 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
 parseMetaItem :: Element -> Meta -> Meta
 parseMetaItem e@(stripNamespace . elName -> field) meta =
-  addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
 
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
 renameMeta "creator" = "author"
-renameMeta s         = T.pack s
+renameMeta s         = s
 
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
   metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
   let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
     (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
-  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest
 
 -- Fixup
 
@@ -200,7 +201,8 @@ fixInternalReferences pathToFile =
   . walk (fixBlockIRs filename)
   . walk (fixInlineIRs filename)
   where
-    (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+    (root, T.unpack . escapeURI . T.pack -> filename) =
+      splitFileName pathToFile
 
 fixInlineIRs :: String -> Inline -> Inline
 fixInlineIRs s (Span as v) =
@@ -213,7 +215,7 @@ fixInlineIRs s (Link as is t) =
   Link (fixAttrs s as) is t
 fixInlineIRs _ v = v
 
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
 prependHash ps l@(Link attr is (url, tit))
   | or [s `T.isPrefixOf` url | s <- ps] =
     Link attr is ("#" <> url, tit)
@@ -230,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
 fixBlockIRs _ b = b
 
 fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+  (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
 
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
 addHash _ ""    = ""
 addHash s ident = T.pack (takeFileName s) <> "#" <> ident
 
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
 
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
 isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
 
 -- Library
@@ -256,39 +259,44 @@ uncurry3 f (a, b, c) = f a b c
 
 -- Utility
 
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
 stripNamespace (QName v _ _) = v
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
 attrToNSPair _                              = Nothing
 
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
 attrToPair (Attr (QName name _ _) val) = (name, val)
 
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
 defaultNameSpace = Just "http://www.idpf.org/2007/opf"
 
-dfName :: String -> QName
+dfName :: Text -> QName
 dfName s = QName s defaultNameSpace Nothing
 
-emptyName :: String -> QName
+emptyName :: Text -> QName
 emptyName s = QName s Nothing Nothing
 
 -- Convert Maybe interface to Either
 
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
 findAttrE q e = mkE "findAttr" $ findAttr q e
 
 findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
-  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+  mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
 
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+  mkE ("Unable to find element: " <> tshow e) $ findElement e x
 
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,13 +25,13 @@ TODO:
 module Text.Pandoc.Readers.FB2 ( readFB2 ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
 import Data.ByteString.Base64.Lazy
 import Data.Functor
 import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -40,8 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -62,12 +63,15 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
       let authors = if null $ fb2Authors st
                     then id
                     else setMeta "author" (map text $ reverse $ fb2Authors st)
@@ -83,12 +87,12 @@ removeHash t = case T.uncons t of
   Just ('#', xs) -> xs
   _              -> t
 
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
 
 parseInline :: PandocMonad m => Content -> FB2 m Inlines
 parseInline (Elem e) =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -96,12 +100,12 @@ parseInline (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement name
       pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
 parseInline (CRef r) = pure $ str $ convertEntity r
 
 parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +115,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
 
 parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
 parseRootElement e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "FictionBook" -> do
       -- Parse notes before parsing the rest of the content.
       case filterChild isNotesBody e of
@@ -144,7 +148,7 @@ parseNote e =
     Just sectionId -> do
       content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
       oldNotes <- gets fb2Notes
-      modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+      modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
       pure ()
   where
     isTitle x = qName (elName x) == "title"
@@ -156,7 +160,7 @@ parseNote e =
 -- | Parse a child of @\<FictionBook>@ element.
 parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
 parseFictionBookChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "stylesheet" -> pure mempty -- stylesheet is ignored
     "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
     "body" -> if isNotesBody e
@@ -168,7 +172,7 @@ parseFictionBookChild e =
 -- | Parse a child of @\<description>@ element.
 parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
 parseDescriptionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
     "src-title-info" -> pure () -- ignore
     "document-info" -> pure ()
@@ -182,7 +186,7 @@ parseDescriptionChild e =
 -- | Parse a child of @\<body>@ element.
 parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
 parseBodyChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "image" -> parseImageElement e
     "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
     "epigraph" -> parseEpigraph e
@@ -196,7 +200,10 @@ parseBinaryElement e =
     (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
     (Just _, Nothing) ->
       report $ IgnoredElement "binary without content-type attribute"
-    (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+    (Just filename, contentType) ->
+      insertMedia (T.unpack filename) contentType
+                    (decodeLenient
+                      (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
 
 -- * Type parsers
 
@@ -206,13 +213,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
 
 parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
 parseAuthorChild e =
-  case T.pack $ qName $ elName e of
-    "first-name" -> pure $ Just $ T.pack $ strContent e
-    "middle-name" -> pure $ Just $ T.pack $ strContent e
-    "last-name" -> pure $ Just $ T.pack $ strContent e
-    "nickname" -> pure $ Just $ T.pack $ strContent e
-    "home-page" -> pure $ Just $ T.pack $ strContent e
-    "email" -> pure $ Just $ T.pack $ strContent e
+  case qName $ elName e of
+    "first-name" -> pure $ Just $ strContent e
+    "middle-name" -> pure $ Just $ strContent e
+    "last-name" -> pure $ Just $ strContent e
+    "nickname" -> pure $ Just $ strContent e
+    "home-page" -> pure $ Just $ strContent e
+    "email" -> pure $ Just $ strContent e
     name -> do
       report $ IgnoredElement $ name <> " in author"
       pure Nothing
@@ -236,13 +243,13 @@ parseTitleContent _ = pure Nothing
 parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
 parseImageElement e =
   case href of
-    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
     Nothing -> do
       report $ IgnoredElement " image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        title = maybe "" T.pack $ findAttr (unqual "title") e
-        imgId = maybe "" T.pack $ findAttr (unqual "id") e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        title = fromMaybe "" $ findAttr (unqual "title") e
+        imgId = fromMaybe "" $ findAttr (unqual "id") e
         href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
 
 -- | Parse @pType@
@@ -256,7 +263,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
 -- | Parse @citeType@ child
 parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
 parseCiteChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "empty-line" -> pure horizontalRule
@@ -271,13 +278,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
 
 parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
 parsePoemChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "epigraph" -> parseEpigraph e
     "stanza" -> parseStanza e
     "text-author" -> para <$> parsePType e
-    "date" -> pure $ para $ text $ T.pack $ strContent e
+    "date" -> pure $ para $ text $ strContent e
     name -> report (UnexpectedXmlElement name "poem") $> mempty
 
 parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +297,7 @@ joinLineBlocks [] = []
 
 parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
 parseStanzaChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +307,11 @@ parseStanzaChild e =
 parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraph e =
   divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
-  where divId = maybe "" T.pack $ findAttr (unqual "id") e
+  where divId = fromMaybe "" $ findAttr (unqual "id") e
 
 parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraphChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -318,7 +325,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
 
 parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
 parseAnnotationChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -332,14 +339,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
 parseSection e = do
   n <- gets fb2SectionLevel
   modify $ \st -> st{ fb2SectionLevel = n + 1 }
-  let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+  let sectionId = fromMaybe "" $ findAttr (unqual "id") e
   bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
   modify $ \st -> st{ fb2SectionLevel = n }
   pure bs
 
 parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
 parseSectionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseBodyChild e
     "epigraph" -> parseEpigraph e
     "image" -> parseImageElement e
@@ -361,16 +368,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
 parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
 parseNamedStyle e = do
   content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
-  let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+  let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
   case findAttr (unqual "name") e of
-    Just name -> pure $ spanWith ("", [T.pack name], lang) content
+    Just name -> pure $ spanWith ("", [name], lang) content
     Nothing -> do
       report $ IgnoredElement "link without required name"
       pure mempty
 
 parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
 parseNamedStyleChild (Elem e) =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -378,7 +385,7 @@ parseNamedStyleChild (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement $ name <> " in style"
@@ -390,7 +397,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
 parseLinkType e = do
   content <- mconcat <$> mapM parseStyleLinkType (elContent e)
   notes <- gets fb2Notes
-  case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+  case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
     Just href -> case findAttr (QName "type" Nothing Nothing) e of
                    Just "note" -> case M.lookup href notes of
                                     Nothing -> pure $ link href "" content
@@ -417,15 +424,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
 -- | Parse @title-infoType@
 parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
 parseTitleInfoChild e =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "genre" -> pure ()
     "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
-    "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+    "book-title" -> modify (setMeta "title" (text $ strContent e))
     "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
     "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
-                                                                      $ T.pack
                                                                       $ strContent e))
-    "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+    "date" -> modify (setMeta "date" (text $ strContent e))
     "coverpage" -> parseCoverPage e
     "lang" -> pure ()
     "src-lang" -> pure ()
@@ -439,7 +445,7 @@ parseCoverPage e =
     Just img -> case href of
                   Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
                   Nothing -> pure ()
-                where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+                where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
     Nothing -> pure ()
 
 -- | Parse @inlineImageType@ element
@@ -452,5 +458,5 @@ parseInlineImageElement e =
     Nothing -> do
       report $ IgnoredElement "inline image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -19,21 +19,20 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 , htmlInBalanced
                                 , isInlineTag
                                 , isBlockTag
-                                , NamedTag(..)
                                 , isTextTag
                                 , isCommentTag
                                 ) where
 
 import Control.Applicative ((<|>))
-import Control.Arrow (first)
 import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAlphaNum, isLetter)
 import Data.Default (Default (..), def)
 import Data.Foldable (for_)
 import Data.List.Split (splitWhen)
+import Data.List (foldl')
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Monoid (First (..))
@@ -62,21 +61,22 @@ import Text.Pandoc.Options (
     ReaderOptions (readerExtensions, readerStripComments),
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
+    htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
-  let tags = stripPrefixes . canonicalizeTags $
+  let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -95,6 +95,15 @@ readHtml opts inp = do
     Right doc -> return doc
     Left  err -> throwError $ PandocParseError $ T.pack $ getError err
 
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s)   = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
 replaceNotes bs = do
   st <- getState
@@ -112,14 +121,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
 setInPlain = local (\s -> s {inPlain = True})
 
 pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
   (TagOpen "html" attr) <- lookAhead pAny
-  for_ (lookup "lang" attr) $
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
 pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+  (TagOpen "body" attr) <- lookAhead pAny
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+    updateState . B.setMeta "lang" . B.text
+  pInTags "body" block
 
 pHead :: PandocMonad m => TagParser m Blocks
 pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
@@ -145,32 +158,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
           return mempty
 
 block :: PandocMonad m => TagParser m Blocks
-block = do
-  res <- choice
-            [ eSection
-            , eSwitch B.para block
-            , mempty <$ eFootnote
-            , mempty <$ eTOC
-            , mempty <$ eTitlePage
-            , pPara
-            , pHeader
-            , pBlockQuote
-            , pCodeBlock
-            , pList
-            , pHrule
-            , pTable block
-            , pHtml
-            , pHead
-            , pBody
-            , pLineBlock
-            , pDiv
-            , pPlain
-            , pFigure
-            , pIframe
-            , pRawHtmlBlock
-            ]
-  trace (T.take 60 $ tshow $ B.toList res)
-  return res
+block = ((do
+  tag <- lookAhead (pSatisfy isBlockTag)
+  exts <- getOption readerExtensions
+  case tag of
+    TagOpen name attr ->
+      let type' = fromMaybe "" $
+                     lookup "type" attr <|> lookup "epub:type" attr
+          epubExts = extensionEnabled Ext_epub_html_exts exts
+      in
+      case name of
+        _ | name `elem` sectioningContent
+          , epubExts
+          , "chapter" `T.isInfixOf` type'
+          -> eSection
+        _ | epubExts
+          , type' `elem` ["footnote", "rearnote"]
+          -> mempty <$ eFootnote
+        _ | epubExts
+          , type' == "toc"
+          -> mempty <$ eTOC
+        _ | "titlepage" `T.isInfixOf` type'
+          , name `elem` ("section" : groupingContent)
+          -> mempty <$ eTitlePage
+        "p" -> pPara
+        "h1" -> pHeader
+        "h2" -> pHeader
+        "h3" -> pHeader
+        "h4" -> pHeader
+        "h5" -> pHeader
+        "h6" -> pHeader
+        "blockquote" -> pBlockQuote
+        "pre" -> pCodeBlock
+        "ul" -> pBulletList
+        "ol" -> pOrderedList
+        "dl" -> pDefinitionList
+        "table" -> pTable block
+        "hr" -> pHrule
+        "html" -> pHtml
+        "head" -> pHead
+        "body" -> pBody
+        "div"
+          | extensionEnabled Ext_line_blocks exts
+          , Just "line-block" <- lookup "class" attr
+          -> pLineBlock
+          | otherwise
+          -> pDiv
+        "section" -> pDiv
+        "header" -> pDiv
+        "main" -> pDiv
+        "figure" -> pFigure
+        "iframe" -> pIframe
+        "style" -> pRawHtmlBlock
+        "textarea" -> pRawHtmlBlock
+        "switch"
+          | epubExts
+          -> eSwitch B.para block
+        _ -> mzero
+    _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+        res <$ trace (T.take 60 $ tshow $ B.toList res)
 
 namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
 namespaces = [(mathMLNamespace, pMath True)]
@@ -243,9 +289,6 @@ eTOC = try $ do
   guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
   void (pInTags tag block)
 
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
 pBulletList :: PandocMonad m => TagParser m Blocks
 pBulletList = try $ do
   pSatisfy (matchTagOpen "ul" [])
@@ -319,7 +362,10 @@ pDefListItem = try $ do
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+  let term = foldl' (\x y -> if null x
+                                then trimInlines y
+                                else x <> B.linebreak <> trimInlines y)
+                    mempty terms
   return (term, map (fixPlains True) defs)
 
 fixPlains :: Bool -> Blocks -> Blocks
@@ -356,13 +402,16 @@ pLineBlock = try $ do
             B.toList ils
   return $ B.lineBlock lns
 
+isDivLike :: Text -> Bool
+isDivLike "div"     = True
+isDivLike "section" = True
+isDivLike "header"  = True
+isDivLike "main"    = True
+isDivLike _         = False
+
 pDiv :: PandocMonad m => TagParser m Blocks
 pDiv = try $ do
   guardEnabled Ext_native_divs
-  let isDivLike "div"     = True
-      isDivLike "section" = True
-      isDivLike "main"    = True
-      isDivLike _         = False
   TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
   let (ident, classes, kvs) = toAttr attr'
   contents <- pInTags tag block
@@ -380,11 +429,17 @@ pIframe = try $ do
   tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
   pCloses "iframe" <|> eof
   url <- canonicalizeUrl $ fromAttrib "src" tag
-  (bs, _) <- openURL url
-  let inp = UTF8.toText bs
-  opts <- readerOpts <$> getState
-  Pandoc _ contents <- readHtml opts inp
-  return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+  if T.null url
+     then ignore $ renderTags' [tag, TagClose "iframe"]
+     else catchError
+       (do (bs, _) <- openURL url
+           let inp = UTF8.toText bs
+           opts <- readerOpts <$> getState
+           Pandoc _ contents <- readHtml opts inp
+           return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+       (\e -> do
+         logMessage $ CouldNotFetchResource url (renderError e)
+         ignore $ renderTags' [tag, TagClose "iframe"])
 
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
@@ -446,17 +501,13 @@ pHeader = try $ do
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let attr = toStringAttr attr'
-  let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
-                                               [("class","title")]
   level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] T.words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   attr'' <- registerHeader (ident, classes, keyvals) contents
-  return $ if bodyTitle
-              then mempty  -- skip a representation of the title in the body
-              else B.headerWith attr'' level contents
+  return $ B.headerWith attr'' level contents
 
 pHrule :: PandocMonad m => TagParser m Blocks
 pHrule = do
@@ -506,7 +557,18 @@ pFigure = try $ do
 pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
-  let attr = toAttr attr'
+  -- if the `pre` has no attributes, try if it is followed by a `code`
+  -- element and use those attributes if possible.
+  attr <- case attr' of
+    _:_ -> pure (toAttr attr')
+    []  -> option nullAttr $ do
+      TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+      pure $ toAttr
+        [ (k, v') | (k, v) <- codeAttr
+                    -- strip language from class
+                  , let v' = if k == "class"
+                             then fromMaybe v (T.stripPrefix "language-" v)
+                             else v ]
   contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
@@ -525,31 +587,47 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = choice
-           [ eNoteref
-           , eSwitch id inline
-           , pTagText
-           , pQ
-           , pEmph
-           , pStrong
-           , pSuperscript
-           , pSubscript
-           , pSpanLike
-           , pSmall
-           , pStrikeout
-           , pUnderline
-           , pLineBreak
-           , pLink
-           , pImage
-           , pSvg
-           , pBdo
-           , pCode
-           , pCodeWithClass [("samp","sample"),("var","variable")]
-           , pSpan
-           , pMath False
-           , pScriptMath
-           , pRawHtmlInline
-           ]
+inline = pTagText <|> do
+  tag <- lookAhead (pSatisfy isInlineTag)
+  exts <- getOption readerExtensions
+  case tag of
+    TagOpen name attr ->
+      case name of
+        "a" | extensionEnabled Ext_epub_html_exts exts
+          , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+          , Just ('#',_) <- lookup "href" attr >>= T.uncons
+            -> eNoteref
+            | otherwise -> pLink
+        "switch" -> eSwitch id inline
+        "q" -> pQ
+        "em" -> pEmph
+        "i"  -> pEmph
+        "strong" -> pStrong
+        "b" -> pStrong
+        "sup" -> pSuperscript
+        "sub" -> pSubscript
+        "small" -> pSmall
+        "s" -> pStrikeout
+        "strike" -> pStrikeout
+        "del" -> pStrikeout
+        "u" -> pUnderline
+        "ins" -> pUnderline
+        "br" -> pLineBreak
+        "img" -> pImage
+        "svg" -> pSvg
+        "bdo" -> pBdo
+        "code" -> pCode
+        "samp" -> pCodeWithClass "samp" "sample"
+        "var" -> pCodeWithClass "var" "variable"
+        "span" -> pSpan
+        "math" -> pMath False
+        "script"
+          | Just x <- lookup "type" attr
+          , "math/tex" `T.isPrefixOf` x -> pScriptMath
+        _ | name `elem` htmlSpanLikeElements -> pSpanLike
+        _ -> pRawHtmlInline
+    TagText _ -> pTagText
+    _ -> pRawHtmlInline
 
 pSelfClosing :: PandocMonad m
              => (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -560,27 +638,25 @@ pSelfClosing f g = do
   return open
 
 pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
-  where citedQuote = do
-          tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
-          url <- canonicalizeUrl $ fromAttrib "cite" tag
-          let uid = fromMaybe (fromAttrib "name" tag) $
-                       maybeFromAttrib "id" tag
-          let cls = T.words $ fromAttrib "class" tag
-
-          makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
-        normalQuote = do
-          pSatisfy $ tagOpenLit "q" (const True)
-          makeQuote id
-        makeQuote wrapper = do
-          ctx <- asks quoteContext
-          let (constructor, innerContext) = case ctx of
-                        InDoubleQuote -> (B.singleQuoted, InSingleQuote)
-                        _             -> (B.doubleQuoted, InDoubleQuote)
-
-          content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
-          return $ extractSpaces (constructor . wrapper) content
+pQ = do
+  TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+  case lookup "cite" attrs of
+    Just url -> do
+      let uid = fromMaybe mempty $
+                   lookup "name" attrs <> lookup "id" attrs
+      let cls = maybe [] T.words $ lookup "class" attrs
+      url' <- canonicalizeUrl url
+      makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+    Nothing -> makeQuote id
+ where
+  makeQuote wrapper = do
+    ctx <- asks quoteContext
+    let (constructor, innerContext) = case ctx of
+                  InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+                  _             -> (B.doubleQuoted, InDoubleQuote)
+    content <- withQuoteContext innerContext
+                  (mconcat <$> manyTill inline (pCloses "q"))
+    return $ extractSpaces (constructor . wrapper) content
 
 pEmph :: PandocMonad m => TagParser m Inlines
 pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -646,17 +722,12 @@ pLink = try $ do
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
-  tag <- pSelfClosing (=="img") (isJust . lookup "src")
+  tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
   url <- canonicalizeUrl $ fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  let uid = fromAttrib "id" tag
-  let cls = T.words $ fromAttrib "class" tag
-  let getAtt k = case fromAttrib k tag of
-                   "" -> []
-                   v  -> [(k, v)]
-  let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
-  return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+  let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+  return $ B.imageWith attr (escapeURI url) title (B.text alt)
 
 pSvg :: PandocMonad m => TagParser m Inlines
 pSvg = do
@@ -671,13 +742,12 @@ pSvg = do
                    UTF8.toText (encode $ UTF8.fromText rawText)
   return $ B.imageWith (ident,cls,[]) svgData mempty mempty
 
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
-  let tagTest = flip elem . fmap fst $ elemToClass
-  TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+  TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
   result <- manyTill pAny (pCloses open)
   let (ids,cs,kvs) = toAttr attr'
-      cs'          = maybe cs (:cs) . lookup open $ elemToClass
+      cs'          = class' : cs
   return . B.codeWith (ids,cs',kvs) .
     T.unwords . T.lines . innerText $ result
 
@@ -764,17 +834,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -868,27 +940,23 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
                then return B.softbreak
                else return B.space
 
-class NamedTag a where
-  getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
-  getTagName (TagOpen t _) = Just t
-  getTagName (TagClose t)  = Just t
-  getTagName _             = Nothing
-
-instance NamedTag (Tag String) where
-  getTagName (TagOpen t _) = Just (T.pack t)
-  getTagName (TagClose t)  = Just (T.pack t)
-  getTagName _             = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t =
-  isCommentTag t || case getTagName t of
-                           Nothing  -> False
-                           Just x   -> x `Set.notMember` blockTags ||
-                                       T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t)  = Just t
+getTagName _             = Nothing
+
+isInlineTag :: Tag Text -> Bool
+isInlineTag t = isCommentTag t || case t of
+  TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+  TagClose "script"  -> True
+  TagOpen name _     -> isInlineTagName name
+  TagClose name      -> isInlineTagName name
+  _                  -> False
+ where isInlineTagName x =
+         x `Set.notMember` blockTags ||
+         T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
                  where isBlockTagName =
                          case getTagName t of
@@ -899,10 +967,10 @@ isBlockTag t = isBlockTagName || isTagComment t
                                     || x `Set.member` eitherBlockOrInline
                               Nothing -> False
 
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
 isTextTag = tagText (const True)
 
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
 isCommentTag = tagComment (const True)
 
 --- parsers for use in markdown, textile readers
@@ -910,13 +978,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -951,22 +1020,24 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
         go n (t:ts') = (t :) <$> go n ts'
         go _ [] = mzero
 
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
 hasTagWarning (TagWarning _:_) = True
 hasTagWarning _                = False
 
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
@@ -1024,21 +1095,6 @@ htmlTag f = try $ do
          handleTag tagname
        _ -> mzero
 
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
-    TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
-  if T.null t then s else T.drop 1 t
-  where (_, t) = T.span (/= ':') s
-
 -- Utilities
 
 -- | Adjusts a url according to the document's base URL.
@@ -1048,26 +1104,3 @@ canonicalizeUrl url = do
   return $ case (parseURIReference (T.unpack url), mbBaseHref) of
                 (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
                 _                   -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
-               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
-                                                       "meta", "link"]
-                            , optRawTag   = matchTags ["script", "style"] }
-              where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types =  -- Document divisions
-   map (\s -> (s, (["section", "body"], 0)))
-    ["volume", "part", "chapter", "division"]
-  <> -- Document section and components
-  [
-    ("abstract",  ([], 0))]
--}
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 2d58319da..bd8d7c96c 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -161,10 +161,12 @@ _ `closes` "html" = False
 "li" `closes` "li" = True
 "th" `closes` t | t `elem` ["th","td"] = True
 "td" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True
 "dd" `closes` t | t `elem` ["dt", "dd"] = True
 "dt" `closes` t | t `elem` ["dt","dd"] = True
 "rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"col" `closes` "col" = True
+"colgroup" `closes` "col" = True
 "optgroup" `closes` "optgroup" = True
 "optgroup" `closes` "option" = True
 "option" `closes` "option" = True
@@ -193,14 +195,20 @@ t1 `closes` t2 |
 _ `closes` _ = False
 
 toStringAttr :: [(Text, Text)] -> [(Text, Text)]
-toStringAttr = map go
+toStringAttr = foldr go []
   where
-   go (x,y) =
-     case T.stripPrefix "data-" x of
-       Just x' | x' `Set.notMember` (html5Attributes <>
-                                     html4Attributes <> rdfaAttributes)
-         -> (x',y)
-       _ -> (x,y)
+   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
+   -- treat xml:lang as lang
+   go ("xml:lang",y) ats = go ("lang",y) ats
+   -- prevent duplicate attributes
+   go (x,y) ats
+     | any (\(x',_) -> x == x') ats = ats
+     | otherwise      =
+        case T.stripPrefix "data-" x of
+          Just x' | x' `Set.notMember` (html5Attributes <>
+                                        html4Attributes <> rdfaAttributes)
+            -> go (x',y) ats
+          _ -> (x,y):ats
 
 -- Unlike fromAttrib from tagsoup, this distinguishes
 -- between a missing attribute and an attribute with empty content.
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 5a783988f..6e62e12f5 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
-   Copyright   : © 2006-2020 John MacFarlane,
-                   2020 Albert Krewinkel
+   Copyright   : © 2006-2021 John MacFarlane,
+                   2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
@@ -17,6 +17,8 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks)
@@ -32,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used:  Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
 pCol = try $ do
   TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
   let attribs = toStringAttr attribs'
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
+  return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
+                    maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
+                      $ safeRead (T.filter
+                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> Right ColWidthDefault
+                Just (T.unsnoc -> Just (xs, '*')) ->
+                  maybe (Left 1) Left $ safeRead xs
                 Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+                  maybe (Right ColWidthDefault)
+                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
+                _ -> Right ColWidthDefault
 
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
 pColgroup = try $ do
   pSatisfy (matchTagOpen "colgroup" [])
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+  let remaining = 1 - sum (map getColWidth $ rights ws)
+      relatives = sum $ lefts ws
+      relUnit = remaining / fromIntegral relatives
+      toColWidth (Right x) = x
+      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+  in  map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
 data CellType
   = HeaderCell
   | BodyCell
@@ -181,7 +200,8 @@ pTable :: PandocMonad m
 pTable block = try $ do
   TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
   caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
-  widths  <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+  widths <- resolveRelativeLengths <$>
+              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
   thead   <- pTableHead block               <* skipMany pBlank
   topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
   tbodies <- many (pTableBody block)        <* skipMany pBlank
@@ -214,8 +234,9 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
           -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
 normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
-  let rowLength = length . rowCells
-  let ncols = maximum (map rowLength rows)
+  let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
+  let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
+  let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
   if null rows
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.TagCategories
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Types
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 25d69f040..67b3af2d3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -17,8 +17,9 @@ module Text.Pandoc.Readers.Haddock
 
 import Control.Monad.Except (throwError)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -28,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
@@ -92,7 +95,7 @@ docHToBlocks d' =
                   then ([], map toCells bodyRows)
                   else (toCells (head headerRows),
                         map toCells (tail headerRows ++ bodyRows))
-             colspecs = replicate (maximum (map length body))
+             colspecs = replicate (maybe 0 maximum (nonEmpty (map length body)))
                              (AlignDefault, ColWidthDefault)
          in  B.table B.emptyCaption
                      colspecs
@@ -128,7 +131,8 @@ docHToInlines isCode d' =
           DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
           _               -> mempty
     DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
-    DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
+    DocModule s -> B.codeWith ("",["haskell","module"],[]) $
+                   T.pack (modLinkName s)
     DocWarning _ -> mempty -- TODO
     DocEmphasis d -> B.emph (docHToInlines isCode d)
     DocMonospaced (DocString s) -> B.code $ T.pack s
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,7 +14,9 @@ Conversion of JATS XML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
-import Data.Char (isDigit, isSpace, toUpper)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
+import Data.Char (isDigit, isSpace)
 import Data.Default
 import Data.Generics
 import Data.List (foldl', intersperse)
@@ -22,15 +24,17 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -49,42 +53,28 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  let sources = toSources inp
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr =
   fromMaybe "" . maybeAttrValue attr
 
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
 maybeAttrValue attr elt =
-  T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+  lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
 
 -- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
 named s e = qName (elName e) == s
 
 --
@@ -150,10 +140,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> JATS m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "p" -> parseMixed para (elContent e)
@@ -202,7 +192,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ textContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -266,7 +256,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = do
-                            w <- findAttrText (unqual "colwidth") c
+                            w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = foldl' max 0 $ map length bodyrows
@@ -437,16 +427,10 @@ parseRef e = do
        Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
        -- TODO handle mixed-citation
 
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
 textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -455,9 +439,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
-  return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack)
+                                $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
@@ -502,9 +486,9 @@ parseInline (Elem e) =
                         else linkWith attr ("#" <> rid) "" ils
         "ext-link" -> do
              ils <- innerInlines id
-             let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+             let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "rid" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, [], [])
@@ -512,7 +496,8 @@ parseInline (Elem e) =
 
         "disp-formula" -> formula displayMath
         "inline-formula" -> formula math
-        "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+        "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
+                   -> return . math $ mathML e
         "tex-math" -> return . math $ textContent e
 
         "email" -> return $ link ("mailto:" <> textContent e) ""
@@ -524,7 +509,7 @@ parseInline (Elem e) =
    where innerInlines f = extractSpaces f . mconcat <$>
                           mapM parseInline (elContent e)
          mathML x =
-            case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+            case readMathML . showElement $ everywhere (mkT removePrefix) x of
                 Left _ -> mempty
                 Right m -> writeTeX m
          formula constructor = do
@@ -535,11 +520,12 @@ parseInline (Elem e) =
                             filterChildren isMathML whereToLook
             return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
 
-         isMathML x = qName   (elName x) == "math" &&
-                      qPrefix (elName x) == Just "mml"
+         isMathML x = qName (elName x) == "math" &&
+                      qURI  (elName x) ==
+                                      Just "http://www.w3.org/1998/Math/MathML"
          removePrefix elname = elname { qPrefix = Nothing }
          codeWithLang = do
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..cf111f173 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
@@ -71,10 +73,10 @@ toPandocCodeBlocks langMay params txt =
                   Nothing                   -> []
   in codeBlockWith ("", classes, map paramToPair params) txt
 
--- | Create a pandoc @'Div'@
+-- | Create a pandoc @'Div'@ from a panel.
 toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks
 toPandocDiv params =
-  divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks
+  divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks
 
 paramToPair :: Jira.Parameter -> (Text, Text)
 paramToPair (Jira.Parameter key value) = (key, value)
@@ -170,6 +172,8 @@ jiraLinkToPandoc linkType alias url =
     Jira.Email      -> link ("mailto:" <> url') "" alias'
     Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias'
     Jira.User       -> linkWith ("", ["user-account"], []) url' "" alias'
+    Jira.SmartCard  -> linkWith ("", ["smart-card"], []) url' "" alias'
+    Jira.SmartLink  -> linkWith ("", ["smart-link"], []) url' "" alias'
 
 -- | Get unicode representation of a Jira icon.
 iconUnicode :: Jira.Icon -> Text
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index cdccaa535..27c018e73 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,14 +1,10 @@
-{-# LANGUAGE BangPatterns          #-}
-{-# LANGUAGE CPP                   #-}
-{-# LANGUAGE FlexibleInstances     #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -22,50 +18,58 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    applyMacros,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
-                                   inlineCommand,
-                                   tokenize,
-                                   untokenize
+                                   inlineCommand
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toUpper, chr)
+import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr)
 import Data.Default
-import Data.Functor (($>))
 import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
 import Data.Text (Text)
 import qualified Data.Text as T
+import Skylighting (defaultSyntaxMap)
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Collate.Lang (renderLang)
+import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report, setResourcePath,
-                                      setTranslations, translateTerm)
+                                      readFileFromDirs, report,
+                                      setResourcePath)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
-                                       babelLangToBCP47)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+                                       inlineEnvironment,
+                                       mathDisplay, mathInline,
+                                       newtheorem, theoremstyle, proof,
+                                       theoremEnvironment)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+                                       enquoteCommands,
+                                       babelLangToBCP47, setDefaultLanguage)
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+                                         nameCommands, charCommands,
+                                         accentCommands,
+                                         biblatexInlineCommands,
+                                         verbCommands, rawInlineOr,
+                                         listingsLanguage)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
-import Safe
+import Data.List.NonEmpty (nonEmpty)
 
 -- for debugging:
 -- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -73,16 +77,17 @@ import Safe
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -93,11 +98,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-#if MIN_VERSION_safe(0,3,18)
-  let bottomLevel = minimumBound 1 $ query headerLevel doc'
-#else
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
-#endif
+  let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
@@ -132,11 +133,10 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,14 +163,13 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   raw <- snd <$>
           (   rawLaTeXParser toks True
-              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
               inlines
           <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
               inlines
@@ -178,11 +177,10 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -191,12 +189,6 @@ inlineCommand = do
 word :: PandocMonad m => LP m Inlines
 word = str . untoken <$> satisfyTok isWordTok
 
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
-  where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
-        isRegularSymbol _                = False
-        isSpecial c = c `Set.member` specialChars
-
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
   ils <- grouped inline
@@ -237,19 +229,6 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
-
-lit :: Text -> LP m Inlines
-lit = pure . str
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,23 +275,14 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
-  skipopts
-  let lang = mblang >>= babelLangToBCP47
-  let langspan = case lang of
-                      Nothing -> id
-                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
-  quoteContext <- sQuoteContext <$> getState
-  if starred || quoteContext == InDoubleQuote
-     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+lit :: Text -> LP m Inlines
+lit = pure . str
 
 blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
                 then (\xs -> para (cite xs mempty))
-                       <$> cites NormalCitation False
+                       <$> cites inline NormalCitation False
                 else option mempty $ para <$> bracketed inline
   let lang = mblang >>= babelLangToBCP47
   let langdiv = case lang of
@@ -323,224 +293,13 @@ blockquote cvariant mblang = do
   optional $ symbolIn (".:;?!" :: [Char])  -- currently ignored
   return $ blockQuote . langdiv $ (bs <> citepar)
 
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
-  acro <- braced
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "singular+" <> form)])
-    $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
-  acro <- braced
-  plural <- lit "s"
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "plural+" <> form)]) $
-   mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  withVerbatimMode $
-    code . untokenize <$>
-      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
-  t@(Tok pos toktype txt) <- anyTok
-  case T.findIndex (== stopchar) txt of
-       Nothing -> return t
-       Just i  -> do
-         let (t1, t2) = T.splitAt i txt
-         inp <- getInput
-         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
-                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
-         return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
-  case lookup "language" opts of
-    Nothing  -> Nothing
-    Just l   -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
-  options <- option [] keyvals
-  let classes = maybeToList $ listingsLanguage options
-  doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
-  skipopts
-  cls <- untokenize <$> braced
-  doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  let stopchar = if marker == '{' then '}' else marker
-  withVerbatimMode $
-    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
-      manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x    = x
-
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
-  symbol '$'
-  display <- option False (True <$ symbol '$')
-  (do contents <- try $ untokenize <$> pDollarsMath 0
-      if display
-         then mathDisplay contents <$ symbol '$'
-         else return $ mathInline contents)
-   <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
-  tk@(Tok _ toktype t) <- anyTok
-  case toktype of
-       Symbol | t == "$"
-              , n == 0 -> return []
-              | t == "\\" -> do
-                  tk' <- anyTok
-                  (tk :) . (tk' :) <$> pDollarsMath n
-              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
-              | t == "}" ->
-                if n > 0
-                then (tk :) <$> pDollarsMath (n-1)
-                else mzero
-       _ -> (tk :) <$> pDollarsMath n
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _      = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
-  let k = last ks
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ toList <$> opt
-  second <- optionMaybe $ toList <$> opt
-  keys <- try $ bgroup *> manyTill citationLabel egroup
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> (mempty, s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> (mempty, mempty)
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel  = do
-  sp
-  untokenize <$>
-    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
-          <* sp
-          <* optional (symbol ',')
-          <* sp)
-  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
-  cits <- if multi
-             then do
-               multiprenote <- optionMaybe $ toList <$> paropt
-               multipostnote <- optionMaybe $ toList <$> paropt
-               let (pre, suf) = case (multiprenote, multipostnote) of
-                     (Just s , Nothing) -> (mempty, s)
-                     (Nothing , Just t) -> (mempty, t)
-                     (Just s , Just t ) -> (s, t)
-                     _                  -> (mempty, mempty)
-               tempCits <- many1 simpleCiteArgs
-               case tempCits of
-                 (k:ks) -> case ks of
-                             (_:_) -> return $ (addMprenote pre k : init ks) ++
-                                                 [addMpostnote suf (last ks)]
-                             _ -> return [addMprenote pre (addMpostnote suf k)]
-                 _ -> return [[]]
-             else count 1 simpleCiteArgs
-  let cs = concat cits
-  return $ case mode of
-        AuthorInText -> case cs of
-                             (c:rest) -> c {citationMode = mode} : rest
-                             []       -> []
-        _            -> map (\a -> a {citationMode = mode}) cs
-  where mprenote (k:ks) = (k:ks) ++ [Space]
-        mprenote _ = mempty
-        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
-        mpostnote _ = mempty
-        addMprenote mpn (k:ks) =
-          let mpnfinal = case citationPrefix k of
-                           (_:_) -> mprenote mpn
-                           _ -> mpn
-          in addPrefix mpnfinal (k:ks)
-        addMprenote _ _ = []
-        addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
-  (c,raw) <- withRaw $ cites mode multi
-  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
-  let isCite Cite{} = True
-      isCite _      = False
-      (pref, rest) = break isCite (toList ils)
-  in case rest of
-          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
-          _                -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
-  (cs, raw) <-
-    withRaw $ concat <$> do
-      bgroup
-      items <- mconcat <$>
-                many1 (notFollowedBy (symbol ';') >> inline)
-                  `sepBy1` symbol ';'
-      egroup
-      return $ map handleCitationPart items
-  case cs of
-       []       -> mzero
-       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
-                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
-  note $ para $ ils <> str "."
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
   guard $ name /= "begin" && name /= "end" && name /= "and"
-  star <- option "" ("*" <$ symbol '*' <* sp)
+  star <- if T.all isAlphaNum name
+             then option "" ("*" <$ symbol '*' <* sp)
+             else pure ""
   overlay <- option "" overlaySpecification
   let name' = name <> star <> overlay
   let names = ordNub [name', name] -- check non-starred as fallback
@@ -551,28 +310,8 @@ inlineCommand' = try $ do
          <|> ignore rawcommand
   lookupListDefault raw names inlineCommands
 
-
 tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
-  where singleChar' = do
-          Tok _ _ t <- singleChar
-          return $ str t
-
-opt :: PandocMonad m => LP m Inlines
-opt = do
-  toks <- try (sp *> bracketedToks <* sp)
-  -- now parse the toks as inlines
-  st <- getState
-  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
-  case parsed of
-    Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
+tok = tokWith inline
 
 unescapeURL :: Text -> Text
 unescapeURL = T.concat . go . T.splitOn "\\"
@@ -585,381 +324,109 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       , isEscapable c = t
       | otherwise = "\\" <> t
 
-mathEnvWith :: PandocMonad m
-            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
-   where inner x = case innerEnv of
-                        Nothing -> x
-                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
-                                   "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
-  skipopts
-  optional blankline
-  res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
-  controlSeq "begin"
-  name <- untokenize <$> braced
-  M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
-    ("displaymath", mathEnvWith id Nothing "displaymath")
-  , ("math", math <$> mathEnv "math")
-  , ("equation", mathEnvWith id Nothing "equation")
-  , ("equation*", mathEnvWith id Nothing "equation*")
-  , ("gather", mathEnvWith id (Just "gathered") "gather")
-  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
-  , ("multline", mathEnvWith id (Just "gathered") "multline")
-  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
-  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
-  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
-  , ("align", mathEnvWith id (Just "aligned") "align")
-  , ("align*", mathEnvWith id (Just "aligned") "align*")
-  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
-  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
-  , ("dmath", mathEnvWith id Nothing "dmath")
-  , ("dmath*", mathEnvWith id Nothing "dmath*")
-  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
-  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
-  , ("darray", mathEnvWith id (Just "aligned") "darray")
-  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
-  ]
-
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
-  [ ("emph", extractSpaces emph <$> tok)
-  , ("textit", extractSpaces emph <$> tok)
-  , ("textsl", extractSpaces emph <$> tok)
-  , ("textsc", extractSpaces smallcaps <$> tok)
-  , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
-  , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
-  , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
-  , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-  , ("texttt", ttfamily)
-  , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-  , ("lq", return (str "‘"))
-  , ("rq", return (str "’"))
-  , ("textquoteleft", return (str "‘"))
-  , ("textquoteright", return (str "’"))
-  , ("textquotedblleft", return (str "“"))
-  , ("textquotedblright", return (str "”"))
-  , ("textsuperscript", extractSpaces superscript <$> tok)
-  , ("textsubscript", extractSpaces subscript <$> tok)
-  , ("textbackslash", lit "\\")
-  , ("backslash", lit "\\")
-  , ("slash", lit "/")
-  , ("textbf", extractSpaces strong <$> tok)
-  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
-  , ("underline", underline <$> tok)
-  , ("ldots", lit "…")
-  , ("vdots", lit "\8942")
-  , ("dots", lit "…")
-  , ("mdots", lit "…")
-  , ("sim", lit "~")
-  , ("sep", lit ",")
-  , ("label", rawInlineOr "label" dolabel)
-  , ("ref", rawInlineOr "ref" $ doref "ref")
-  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
-  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
-  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
-  , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
-  , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
-  , ("lettrine", rawInlineOr "lettrine" lettrine)
-  , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
-  , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
-  , ("ensuremath", mathInline . untokenize <$> braced)
-  , ("texorpdfstring", const <$> tok <*> tok)
-  , ("P", lit "¶")
-  , ("S", lit "§")
-  , ("$", lit "$")
-  , ("%", lit "%")
-  , ("&", lit "&")
-  , ("#", lit "#")
-  , ("_", lit "_")
-  , ("{", lit "{")
-  , ("}", lit "}")
-  , ("qed", lit "\a0\x25FB")
-  -- old TeX commands
-  , ("em", extractSpaces emph <$> inlines)
-  , ("it", extractSpaces emph <$> inlines)
-  , ("sl", extractSpaces emph <$> inlines)
-  , ("bf", extractSpaces strong <$> inlines)
-  , ("tt", code . stringify . toList <$> inlines)
-  , ("rm", inlines)
-  , ("itshape", extractSpaces emph <$> inlines)
-  , ("slshape", extractSpaces emph <$> inlines)
-  , ("scshape", extractSpaces smallcaps <$> inlines)
-  , ("bfseries", extractSpaces strong <$> inlines)
-  , ("MakeUppercase", makeUppercase <$> tok)
-  , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
-  , ("uppercase", makeUppercase <$> tok)
-  , ("MakeLowercase", makeLowercase <$> tok)
-  , ("MakeTextLowercase", makeLowercase <$> tok)
-  , ("lowercase", makeLowercase <$> tok)
-  , ("/", pure mempty) -- italic correction
-  , ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
-  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
-                            guard $ not inTableCell
-                            optional opt
-                            spaces))
-  , (",", lit "\8198")
-  , ("@", pure mempty)
-  , (" ", lit "\160")
-  , ("ps", pure $ str "PS." <> space)
-  , ("TeX", lit "TeX")
-  , ("LaTeX", lit "LaTeX")
-  , ("bar", lit "|")
-  , ("textless", lit "<")
-  , ("textgreater", lit ">")
-  , ("thanks", skipopts >> note <$> grouped block)
-  , ("footnote", skipopts >> note <$> grouped block)
-  , ("passthrough", tok) -- \passthrough macro used by latex writer
-                         -- for listings
-  , ("verb", doverb)
-  , ("lstinline", dolstinline)
-  , ("mintinline", domintinline)
-  , ("Verb", doverb)
-  , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                  bracedUrl)
-  , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
-  , ("href", do url <- bracedUrl
-                sp
-                link (unescapeURL $ untokenize url) "" <$> tok)
-  , ("includegraphics", do options <- option [] keyvals
-                           src <- braced
-                           mkImage options . unescapeURL . removeDoubleQuotes $
-                               untokenize src)
-  , ("enquote*", enquote True Nothing)
-  , ("enquote", enquote False Nothing)
-  -- foreignquote is supposed to use native quote marks
-  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-  , ("foreignquote", braced >>= enquote False . Just . untokenize)
-  -- hypehnquote uses regular quotes
-  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
-  , ("figurename", doTerm Translations.Figure)
-  , ("prefacename", doTerm Translations.Preface)
-  , ("refname", doTerm Translations.References)
-  , ("bibname", doTerm Translations.Bibliography)
-  , ("chaptername", doTerm Translations.Chapter)
-  , ("partname", doTerm Translations.Part)
-  , ("contentsname", doTerm Translations.Contents)
-  , ("listfigurename", doTerm Translations.ListOfFigures)
-  , ("listtablename", doTerm Translations.ListOfTables)
-  , ("indexname", doTerm Translations.Index)
-  , ("abstractname", doTerm Translations.Abstract)
-  , ("tablename", doTerm Translations.Table)
-  , ("enclname", doTerm Translations.Encl)
-  , ("ccname", doTerm Translations.Cc)
-  , ("headtoname", doTerm Translations.To)
-  , ("pagename", doTerm Translations.Page)
-  , ("seename", doTerm Translations.See)
-  , ("seealsoname", doTerm Translations.SeeAlso)
-  , ("proofname", doTerm Translations.Proof)
-  , ("glossaryname", doTerm Translations.Glossary)
-  , ("lstlistingname", doTerm Translations.Listing)
-  , ("cite", citation "cite" NormalCitation False)
-  , ("Cite", citation "Cite" NormalCitation False)
-  , ("citep", citation "citep" NormalCitation False)
-  , ("citep*", citation "citep*" NormalCitation False)
-  , ("citeal", citation "citeal" NormalCitation False)
-  , ("citealp", citation "citealp" NormalCitation False)
-  , ("citealp*", citation "citealp*" NormalCitation False)
-  , ("autocite", citation "autocite" NormalCitation False)
-  , ("smartcite", citation "smartcite" NormalCitation False)
-  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
-  , ("parencite", citation "parencite" NormalCitation False)
-  , ("supercite", citation "supercite" NormalCitation False)
-  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
-  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
-  , ("citeyear", citation "citeyear" SuppressAuthor False)
-  , ("autocite*", citation "autocite*" SuppressAuthor False)
-  , ("cite*", citation "cite*" SuppressAuthor False)
-  , ("parencite*", citation "parencite*" SuppressAuthor False)
-  , ("textcite", citation "textcite" AuthorInText False)
-  , ("citet", citation "citet" AuthorInText False)
-  , ("citet*", citation "citet*" AuthorInText False)
-  , ("citealt", citation "citealt" AuthorInText False)
-  , ("citealt*", citation "citealt*" AuthorInText False)
-  , ("textcites", citation "textcites" AuthorInText True)
-  , ("cites", citation "cites" NormalCitation True)
-  , ("autocites", citation "autocites" NormalCitation True)
-  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
-  , ("parencites", citation "parencites" NormalCitation True)
-  , ("supercites", citation "supercites" NormalCitation True)
-  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
-  , ("Autocite", citation "Autocite" NormalCitation False)
-  , ("Smartcite", citation "Smartcite" NormalCitation False)
-  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
-  , ("Parencite", citation "Parencite" NormalCitation False)
-  , ("Supercite", citation "Supercite" NormalCitation False)
-  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
-  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
-  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
-  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
-  , ("Cite*", citation "Cite*" SuppressAuthor False)
-  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
-  , ("Textcite", citation "Textcite" AuthorInText False)
-  , ("Textcites", citation "Textcites" AuthorInText True)
-  , ("Cites", citation "Cites" NormalCitation True)
-  , ("Autocites", citation "Autocites" NormalCitation True)
-  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
-  , ("Parencites", citation "Parencites" NormalCitation True)
-  , ("Supercites", citation "Supercites" NormalCitation True)
-  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
-  , ("citetext", complexNatbibCitation NormalCitation)
-  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
-                        complexNatbibCitation AuthorInText)
-                   <|> citation "citeauthor" AuthorInText False)
-  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
-                          addMeta "nocite"))
-  , ("hyperlink", hyperlink)
-  , ("hypertarget", hypertargetInline)
-  -- glossaries package
-  , ("gls", doAcronym "short")
-  , ("Gls", doAcronym "short")
-  , ("glsdesc", doAcronym "long")
-  , ("Glsdesc", doAcronym "long")
-  , ("GLSdesc", doAcronym "long")
-  , ("acrlong", doAcronym "long")
-  , ("Acrlong", doAcronym "long")
-  , ("acrfull", doAcronym "full")
-  , ("Acrfull", doAcronym "full")
-  , ("acrshort", doAcronym "abbrv")
-  , ("Acrshort", doAcronym "abbrv")
-  , ("glspl", doAcronymPlural "short")
-  , ("Glspl", doAcronymPlural "short")
-  , ("glsdescplural", doAcronymPlural "long")
-  , ("Glsdescplural", doAcronymPlural "long")
-  , ("GLSdescplural", doAcronymPlural "long")
-  -- acronyms package
-  , ("ac", doAcronym "short")
-  , ("acf", doAcronym "full")
-  , ("acs", doAcronym "abbrv")
-  , ("acl", doAcronym "long")
-  , ("acp", doAcronymPlural "short")
-  , ("acfp", doAcronymPlural "full")
-  , ("acsp", doAcronymPlural "abbrv")
-  , ("aclp", doAcronymPlural "long")
-  , ("Ac", doAcronym "short")
-  , ("Acf", doAcronym "full")
-  , ("Acs", doAcronym "abbrv")
-  , ("Acl", doAcronym "long")
-  , ("Acp", doAcronymPlural "short")
-  , ("Acfp", doAcronymPlural "full")
-  , ("Acsp", doAcronymPlural "abbrv")
-  , ("Aclp", doAcronymPlural "long")
-  -- siuntix
-  , ("si", skipopts *> dosi tok)
-  , ("SI", doSI tok)
-  , ("SIrange", doSIrange True tok)
-  , ("numrange", doSIrange False tok)
-  , ("numlist", doSInumlist)
-  , ("num", doSInum)
-  , ("ang", doSIang)
-  -- hyphenat
-  , ("bshyp", lit "\\\173")
-  , ("fshyp", lit "/\173")
-  , ("dothyp", lit ".\173")
-  , ("colonhyp", lit ":\173")
-  , ("hyp", lit "-")
-  , ("nohyphens", tok)
-  , ("textnhtt", ttfamily)
-  , ("nhttfamily", ttfamily)
-  -- LaTeX colors
-  , ("textcolor", coloredInline "color")
-  , ("colorbox", coloredInline "background-color")
-  -- fontawesome
-  , ("faCheck", lit "\10003")
-  , ("faClose", lit "\10007")
-  -- xspace
-  , ("xspace", doxspace)
-  -- etoolbox
-  , ("ifstrequal", ifstrequal)
-  , ("newtoggle", braced >>= newToggle)
-  , ("toggletrue", braced >>= setToggle True)
-  , ("togglefalse", braced >>= setToggle False)
-  , ("iftoggle", try $ ifToggle >> inline)
-  -- biblatex misc
-  , ("RN", romanNumeralUpper)
-  , ("Rn", romanNumeralLower)
-  -- babel
-  , ("foreignlanguage", foreignlanguage)
-  -- include
-  , ("input", rawInlineOr "input" $ include "input")
-  -- soul package
-  , ("ul", underline <$> tok)
-  -- ulem package
-  , ("uline", underline <$> tok)
-  -- plain tex stuff that should just be passed through as raw tex
-  , ("ifdim", ifdim)
-   -- stackengine
-   , ("addstackgap", skipopts *> tok)
-  ]
-
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
+inlineCommands = M.unions
+  [ accentCommands tok
+  , citationCommands inline
+  , siunitxCommands tok
+  , acronymCommands
+  , refCommands
+  , nameCommands
+  , verbCommands
+  , charCommands
+  , enquoteCommands tok
+  , inlineLanguageCommands tok
+  , biblatexInlineCommands tok
+  , rest ]
+ where
+  rest = M.fromList
+    [ ("emph", extractSpaces emph <$> tok)
+    , ("textit", extractSpaces emph <$> tok)
+    , ("textsl", extractSpaces emph <$> tok)
+    , ("textsc", extractSpaces smallcaps <$> tok)
+    , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+    , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+    , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+    , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+    , ("texttt", ttfamily)
+    , ("sout", extractSpaces strikeout <$> tok)
+    , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+    , ("textsuperscript", extractSpaces superscript <$> tok)
+    , ("textsubscript", extractSpaces subscript <$> tok)
+    , ("textbf", extractSpaces strong <$> tok)
+    , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+    , ("underline", underline <$> tok)
+    , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+    , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+    , ("lettrine", rawInlineOr "lettrine" lettrine)
+    , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+    , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+    , ("ensuremath", mathInline . untokenize <$> braced)
+    , ("texorpdfstring", const <$> tok <*> tok)
+    -- old TeX commands
+    , ("em", extractSpaces emph <$> inlines)
+    , ("it", extractSpaces emph <$> inlines)
+    , ("sl", extractSpaces emph <$> inlines)
+    , ("bf", extractSpaces strong <$> inlines)
+    , ("tt", code . stringify . toList <$> inlines)
+    , ("rm", inlines)
+    , ("itshape", extractSpaces emph <$> inlines)
+    , ("slshape", extractSpaces emph <$> inlines)
+    , ("scshape", extractSpaces smallcaps <$> inlines)
+    , ("bfseries", extractSpaces strong <$> inlines)
+    , ("MakeUppercase", makeUppercase <$> tok)
+    , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+    , ("uppercase", makeUppercase <$> tok)
+    , ("MakeLowercase", makeLowercase <$> tok)
+    , ("MakeTextLowercase", makeLowercase <$> tok)
+    , ("lowercase", makeLowercase <$> tok)
+    , ("thanks", skipopts >> note <$> grouped block)
+    , ("footnote", skipopts >> note <$> grouped block)
+    , ("passthrough", tok) -- \passthrough macro used by latex writer
+                           -- for listings
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options .
+                               unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
+    -- hyperref
+    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+                    bracedUrl)
+    , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+    , ("href", do url <- bracedUrl
+                  sp
+                  link (unescapeURL $ untokenize url) "" <$> tok)
+    , ("hyperlink", hyperlink)
+    , ("hyperref", hyperref)
+    , ("hypertarget", hypertargetInline)
+    -- hyphenat
+    , ("nohyphens", tok)
+    , ("textnhtt", ttfamily)
+    , ("nhttfamily", ttfamily)
+    -- LaTeX colors
+    , ("textcolor", coloredInline "color")
+    , ("colorbox", coloredInline "background-color")
+    -- etoolbox
+    , ("ifstrequal", ifstrequal)
+    , ("newtoggle", braced >>= newToggle)
+    , ("toggletrue", braced >>= setToggle True)
+    , ("togglefalse", braced >>= setToggle False)
+    , ("iftoggle", try $ ifToggle >> inline)
+    -- include
+    , ("input", rawInlineOr "input" $ include "input")
+    -- soul package
+    , ("ul", underline <$> tok)
+    -- ulem package
+    , ("uline", underline <$> tok)
+    -- plain tex stuff that should just be passed through as raw tex
+    , ("ifdim", ifdim)
+    -- stackengine
+    , ("addstackgap", skipopts *> tok)
+    ]
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
-  optional opt
+  optional rawopt
   x <- tok
   y <- tok
   return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -979,32 +446,18 @@ alterStr :: (Text -> Text) -> Inline -> Inline
 alterStr f (Str xs) = Str (f xs)
 alterStr _ x = x
 
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
-  babelLang <- untokenize <$> braced
-  case babelLangToBCP47 babelLang of
-       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
-       _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
-  where
-    mk (polyglossia, bcp47Func) =
-      ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  let lang = renderLang $ bcp47Func o
-  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
 hyperlink :: PandocMonad m => LP m Inlines
 hyperlink = try $ do
   src <- untokenize <$> braced
   lab <- tok
   return $ link ("#" <> src) "" lab
 
+hyperref :: PandocMonad m => LP m Inlines
+hyperref = try $ do
+  url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp))
+       <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl)
+  link url "" <$> tok
+
 hypertargetBlock :: PandocMonad m => LP m Blocks
 hypertargetBlock = try $ do
   ref <- untokenize <$> braced
@@ -1019,31 +472,6 @@ hypertargetInline = try $ do
   ils <- grouped inline
   return $ spanWith (ref, [], []) ils
 
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
-  str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
-  str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
-  where
-    inBraces = do
-      symbol '{'
-      spaces
-      res <- parser
-      spaces
-      symbol '}'
-      return res
-    parser = do
-      Tok _ Word s <- satisfyTok isWordTok
-      let (digits, rest) = T.span isDigit s
-      unless (T.null rest) $
-        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
-      safeRead digits
-
 newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
 newToggle name = do
   updateState $ \st ->
@@ -1074,9 +502,6 @@ ifToggle = do
                   report $ UndefinedToggle name' pos
   return ()
 
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
 ifstrequal :: (PandocMonad m, Monoid a) => LP m a
 ifstrequal = do
   str1 <- tok
@@ -1097,13 +522,6 @@ coloredInline stylename = do
 ttfamily :: PandocMonad m => LP m Inlines
 ttfamily = code . stringify . toList <$> tok
 
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
-  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
-  if parseRaw
-     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
-     else fallback
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
@@ -1154,79 +572,90 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-label :: PandocMonad m => LP m ()
-label = do
-  controlSeq "label"
-  t <- braced
-  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
-  v <- braced
-  let refstr = untokenize v
-  updateState $ \st ->
-    st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
-  v <- braced
-  let refstr = untokenize v
-  return $ linkWith ("",[],[ ("reference-type", cls)
-                           , ("reference", refstr)])
-                    ("#" <> refstr)
-                    ""
-                    (inBrackets $ str refstr)
-
 lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
 lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
 
 inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
-     <|> (space  <$ whitespace)
-     <|> (softbreak <$ endline)
-     <|> word
-     <|> macroDef (rawInline "latex")
-     <|> inlineCommand'
-     <|> inlineEnvironment
-     <|> inlineGroup
-     <|> (symbol '-' *>
-           option (str "-") (symbol '-' *>
-             option (str "–") (str "—" <$ symbol '-')))
-     <|> doubleQuote
-     <|> singleQuote
-     <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "”" <$ symbol '”')
-     <|> (str "’" <$ symbol '\'')
-     <|> (str "’" <$ symbol '’')
-     <|> (str "\160" <$ symbol '~')
-     <|> dollarsMath
-     <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-     <|> (str . T.singleton <$> primEscape)
-     <|> regularSymbol
-     <|> (do res <- symbolIn "#^'`\"[]&"
-             pos <- getPosition
-             let s = untoken res
-             report $ ParsingUnescaped s pos
-             return $ str s)
+inline = do
+  Tok pos toktype t <- lookAhead anyTok
+  let symbolAsString = str . untoken <$> anySymbol
+  let unescapedSymbolAsString =
+        do s <- untoken <$> anySymbol
+           report $ ParsingUnescaped s pos
+           return $ str s
+  case toktype of
+    Comment     -> mempty <$ comment
+    Spaces      -> space <$ whitespace
+    Newline     -> softbreak <$ endline
+    Word        -> word
+    Esc1        -> str . T.singleton <$> primEscape
+    Esc2        -> str . T.singleton <$> primEscape
+    Symbol      ->
+      case t of
+        "-"     -> symbol '-' *>
+                    option (str "-") (symbol '-' *>
+                      option (str "–") (str "—" <$ symbol '-'))
+        "'"     -> symbol '\'' *>
+                  option (str "’") (str  "”" <$ symbol '\'')
+        "~"     -> str "\160" <$ symbol '~'
+        "`"     -> doubleQuote <|> singleQuote <|> symbolAsString
+        "\""    -> doubleQuote <|> singleQuote <|> symbolAsString
+        "“"     -> doubleQuote <|> symbolAsString
+        "‘"     -> singleQuote <|> symbolAsString
+        "$"     -> dollarsMath <|> unescapedSymbolAsString
+        "|"     -> (guardEnabled Ext_literate_haskell *>
+                    symbol '|' *> doLHSverb) <|> symbolAsString
+        "{"     -> inlineGroup
+        "#"     -> unescapedSymbolAsString
+        "&"     -> unescapedSymbolAsString
+        "_"     -> unescapedSymbolAsString
+        "^"     -> unescapedSymbolAsString
+        "\\"    -> mzero
+        "}"     -> mzero
+        _       -> symbolAsString
+    CtrlSeq _   -> macroDef (rawInline "latex")
+                  <|> inlineCommand'
+                  <|> inlineEnvironment
+                  <|> inlineGroup
+    _           -> mzero
 
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
 
+opt :: PandocMonad m => LP m Inlines
+opt = do
+  toks <- try (sp *> bracketedToks <* sp)
+  -- now parse the toks as inlines
+  st <- getState
+  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
+
 -- block elements:
 
 preamble :: PandocMonad m => LP m Blocks
 preamble = mconcat <$> many preambleBlock
   where preambleBlock =  (mempty <$ spaces1)
                      <|> macroDef (rawBlock "latex")
+                     <|> filecontents
                      <|> (mempty <$ blockCommand)
                      <|> (mempty <$ braced)
                      <|> (do notFollowedBy (begin_ "document")
                              anyTok
                              return mempty)
 
+rule :: PandocMonad m => LP m Blocks
+rule = do
+  skipopts
+  width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok
+  _thickness <- tok
+  -- 0-width rules are used to fix spacing issues:
+  case safeRead width of
+    Just (0 :: Double) -> return mempty
+    _ -> return horizontalRule
+
 paragraph :: PandocMonad m => LP m Blocks
 paragraph = do
   x <- trimInlines . mconcat <$> many1 inline
@@ -1264,6 +693,16 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
 insertIncluded :: PandocMonad m
                => FilePath
                -> FilePath
@@ -1273,13 +712,12 @@ insertIncluded defaultExtension f' = do
                 ".tex" -> f'
                 ".sty" -> f'
                 _      -> addExtension f' defaultExtension
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
   pos <- getPosition
   containers <- getIncludeFiles <$> getState
   when (T.pack f `elem` containers) $
     throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
   updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromDirs dirs f
+  mbcontents <- readFileFromTexinputs f
   contents <- case mbcontents of
                    Just s -> return s
                    Nothing -> do
@@ -1288,10 +726,6 @@ insertIncluded defaultExtension f' = do
   getInput >>= setInput . (tokenize f contents ++)
   updateState dropLatestIncludeFile
 
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
-   st{ sMeta = addMetaField field val $ sMeta st }
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
@@ -1300,150 +734,6 @@ authors = try $ do
   egroup
   addMeta "author" (map trimInlines auths)
 
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
-    (_, s) <- withRaw (commandDef <|> environmentDef)
-    (constructor (untokenize s) <$
-      guardDisabled Ext_latex_macros)
-     <|> return mempty
-  where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
-          guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
-        environmentDef = do
-          mbenv <- newenvironment
-          case mbenv of
-            Nothing -> return ()
-            Just (name, macro1, macro2) ->
-              guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
-        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
-        -- is equivalent to
-        -- @\newcommand{\envname}[n-args][default]{begin}@
-        -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
-  controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    optional $ symbol '='
-    spaces
-    -- we first parse in verbatim mode, and then expand macros,
-    -- because we don't want \let\foo\bar to turn into
-    -- \let\foo hello if we have previously \def\bar{hello}
-    contents <- bracedOrToken
-    return (name, contents)
-  contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
-  -- we use withVerbatimMode, because macros are to be expanded
-  -- at point of use, not point of definition
-  withVerbatimMode $ do
-    controlSeq "def"
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    argspecs <- many (argspecArg <|> argspecPattern)
-    contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
-  Tok _ (Arg i) _ <- satisfyTok isArgTok
-  return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
-  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
-                              (toktype' == Symbol || toktype' == Word) &&
-                              (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
-                             controlSeq "renewcommand" <|>
-                             controlSeq "providecommand" <|>
-                             controlSeq "DeclareMathOperator" <|>
-                             controlSeq "DeclareRobustCommand"
-  withVerbatimMode $ do
-    Tok _ (CtrlSeq name) txt <- do
-      optional (symbol '*')
-      anyControlSeq <|>
-        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    let argspecs = map ArgNum [1..numargs]
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    spaces
-    contents' <- bracedOrToken
-    let contents =
-         case mtype of
-              "DeclareMathOperator" ->
-                 Tok pos (CtrlSeq "mathop") "\\mathop"
-                 : Tok pos Symbol "{"
-                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
-                 : Tok pos Symbol "{"
-                 : (contents' ++
-                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
-              _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
-                             controlSeq "renewenvironment" <|>
-                             controlSeq "provideenvironment"
-  withVerbatimMode $ do
-    optional $ symbol '*'
-    spaces
-    name <- untokenize <$> braced
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    let argspecs = map (\i -> ArgNum i) [1..numargs]
-    startcontents <- spaces >> bracedOrToken
-    endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
-  ds <- untokenize <$> bracketedToks
-  case safeRead ds of
-       Just i -> return i
-       _      -> return 0
-
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
-  skipopts
-  ils <- tok
-  optional $ try $ spaces *> label
-  updateState $ \st -> st{ sCaption = Just ils }
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
@@ -1457,10 +747,6 @@ epigraph = do
   p2 <- grouped block
   return $ divWith ("", ["epigraph"], []) (p1 <> p2)
 
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption   = Nothing
-                                      , sLastLabel = Nothing }
-
 section :: PandocMonad m => Attr -> Int -> LP m Blocks
 section (ident, classes, kvs) lvl = do
   skipopts
@@ -1554,7 +840,7 @@ blockCommands = M.fromList
    , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
    , ("signature", mempty <$ (skipopts *> authors))
    , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-   , ("newtheorem", newtheorem)
+   , ("newtheorem", newtheorem inline)
    , ("theoremstyle", theoremstyle)
    -- KOMA-Script metadata commands
    , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1598,11 +884,11 @@ blockCommands = M.fromList
    --
    , ("hrule", pure horizontalRule)
    , ("strut", pure mempty)
-   , ("rule", skipopts *> tok *> tok $> horizontalRule)
+   , ("rule", rule)
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-   , ("caption", mempty <$ setCaption)
+   , ("caption", mempty <$ setCaption inline)
    , ("bibliography", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1640,7 +926,8 @@ blockCommands = M.fromList
 
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+   M.fromList
    [ ("document", env "document" blocks <* skipMany anyTok)
    , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
    , ("sloppypar", env "sloppypar" blocks)
@@ -1654,13 +941,6 @@ environments = M.fromList
    , ("flushright", divWith ("", ["flushright"], []) <$> env "flushright" blocks)
    , ("flushleft", divWith ("", ["flushleft"], []) <$> env "flushleft" blocks)
    , ("landscape", env "landscape" blocks)
-   , ("longtable",  env "longtable" $
-          resetCaption *> simpTable "longtable" False >>= addTableCaption)
-   , ("table",  env "table" $
-          skipopts *> resetCaption *> blocks >>= addTableCaption)
-   , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
-   , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
-   , ("tabular", env "tabular"  $ simpTable "tabular" False)
    , ("quote", blockQuote <$> env "quote" blocks)
    , ("quotation", blockQuote <$> env "quotation" blocks)
    , ("verse", blockQuote <$> env "verse" blocks)
@@ -1683,7 +963,7 @@ environments = M.fromList
    , ("lilypond", rawVerbEnv "lilypond")
    , ("ly", rawVerbEnv "ly")
    -- amsthm
-   , ("proof", proof)
+   , ("proof", proof blocks opt)
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -1692,130 +972,29 @@ environments = M.fromList
    , ("iftoggle", try $ ifToggle >> block)
    ]
 
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
-  stylename <- untokenize <$> braced
-  let mbstyle = case stylename of
-                  "plain"      -> Just PlainStyle
-                  "definition" -> Just DefinitionStyle
-                  "remark"     -> Just RemarkStyle
-                  _            -> Nothing
-  case mbstyle of
-    Nothing  -> return ()
-    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
-  return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
-  number <- option True (False <$ symbol '*' <* sp)
+filecontents :: PandocMonad m => LP m Blocks
+filecontents = try $ do
+  controlSeq "begin"
   name <- untokenize <$> braced
-  sp
-  series <- option Nothing $ Just . untokenize <$> bracketedToks
-  sp
-  showName <- tok
-  sp
-  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
-  sty <- sLastTheoremStyle <$> getState
-  let spec = TheoremSpec { theoremName = showName
-                         , theoremStyle = sty
-                         , theoremSeries = series
-                         , theoremSyncTo = syncTo
-                         , theoremNumber = number
-                         , theoremLastNum = DottedNum [0] }
-  tmap <- sTheoremMap <$> getState
-  updateState $ \s -> s{ sTheoremMap =
-                            M.insert name spec tmap }
+  guard $ name == "filecontents" || name == "filecontents*"
+  skipopts
+  fp <- untokenize <$> braced
+  txt <- verbEnv name
+  updateState $ \st ->
+    st{ sFileContents = M.insert fp txt (sFileContents st) }
   return mempty
 
-proof :: PandocMonad m => LP m Blocks
-proof = do
-  title <- option (B.text "Proof") opt
-  bs <- env "proof" blocks
-  return $
-    B.divWith ("", ["proof"], []) $
-      addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
-  case B.toList bs of
-    (Para xs : rest)
-      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
-    _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
-  case Seq.viewr (B.unMany bs) of
-    s Seq.:> Para ils
-      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
-    _ -> bs <> B.para qedSign
- where
-  qedSign = B.str "\xa0\x25FB"
-
 environment :: PandocMonad m => LP m Blocks
 environment = try $ do
   controlSeq "begin"
   name <- untokenize <$> braced
   M.findWithDefault mzero name environments <|>
-    theoremEnvironment name <|>
+    theoremEnvironment blocks opt name <|>
     if M.member name (inlineEnvironments
                        :: M.Map Text (LP PandocPure Inlines))
        then mzero
        else try (rawEnv name) <|> rawVerbEnv name
 
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
-  tmap <- sTheoremMap <$> getState
-  case M.lookup name tmap of
-    Nothing -> mzero
-    Just tspec -> do
-       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
-       bs <- env name blocks
-       number <-
-         if theoremNumber tspec
-            then do
-               let name' = fromMaybe name $ theoremSeries tspec
-               num <- getNextNumber
-                   (maybe (DottedNum [0]) theoremLastNum .
-                    M.lookup name' . sTheoremMap)
-               updateState $ \s ->
-                 s{ sTheoremMap =
-                       M.adjust
-                       (\spec -> spec{ theoremLastNum = num })
-                       name'
-                       (sTheoremMap s)
-                  }
-
-               case mblabel of
-                 Just ident ->
-                   updateState $ \s ->
-                     s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
-                 Nothing -> return ()
-               return $ space <> B.text (renderDottedNum num)
-            else return mempty
-       let titleEmph = case theoremStyle tspec of
-                         PlainStyle      -> B.strong
-                         DefinitionStyle -> B.strong
-                         RemarkStyle     -> B.emph
-       let title = titleEmph (theoremName tspec <> number)
-                      <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
-              $ case theoremStyle tspec of
-                  PlainStyle -> walk italicize bs
-                  _          -> bs
-
-italicize :: Block -> Block
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
@@ -1823,15 +1002,17 @@ rawEnv name = do
   rawOptions <- mconcat <$> many rawopt
   let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
   pos1 <- getPosition
-  (bs, raw) <- withRaw $ env name blocks
   if parseRaw
-     then return $ rawBlock "latex"
+     then do
+       (_, raw) <- withRaw $ env name blocks
+       return $ rawBlock "latex"
                  $ beginCommand <> untokenize raw
      else do
+       bs <- env name blocks
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-       return bs
+       return $ divWith ("",[name],[]) bs
 
 rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
 rawVerbEnv name = do
@@ -1890,8 +1071,7 @@ inputMinted = do
   pos <- getPosition
   attr <- mintedAttr
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   rawcode <- case mbCode of
                   Just s -> return s
                   Nothing -> do
@@ -1989,8 +1169,7 @@ inputListing = do
   pos <- getPosition
   options <- option [] keyvals
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   codeLines <- case mbCode of
                       Just s -> return $ T.lines s
                       Nothing -> do
@@ -1999,7 +1178,8 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap
+                                (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
@@ -2065,358 +1245,23 @@ orderedList' = try $ do
   bs <- listenv "enumerate" (many item)
   return $ orderedListWith (start, style, delim) bs
 
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
-  spaces
-  controlSeq "hline" <|>
-    -- booktabs rules:
-    controlSeq "toprule" <|>
-    controlSeq "bottomrule" <|>
-    controlSeq "midrule" <|>
-    controlSeq "endhead" <|>
-    controlSeq "endfirsthead"
-  spaces
-  optional opt
-  return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
-         <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
-  inp <- getInput
-  case inp of
-       (Tok spos Word t : rest) ->
-         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
-       _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
-  let maybeBar = skipMany
-        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
-  let cAlign = AlignCenter <$ symbol 'c'
-  let lAlign = AlignLeft <$ symbol 'l'
-  let rAlign = AlignRight <$ symbol 'r'
-  let parAlign = AlignLeft <$ symbol 'p'
-  -- aligns from tabularx
-  let xAlign = AlignLeft <$ symbol 'X'
-  let mAlign = AlignLeft <$ symbol 'm'
-  let bAlign = AlignLeft <$ symbol 'b'
-  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
-                                 <|> xAlign <|> mAlign <|> bAlign )
-  let alignPrefix = symbol '>' >> braced
-  let alignSuffix = symbol '<' >> braced
-  let colWidth = try $ do
-        symbol '{'
-        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
-        spaces
-        symbol '}'
-        return $ safeRead ds
-  let alignSpec = do
-        pref <- option [] alignPrefix
-        spaces
-        al <- alignChar
-        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
-                                                 pos <- getPosition
-                                                 report $ SkippedContent s pos
-                                                 return Nothing)
-        spaces
-        suff <- option [] alignSuffix
-        return (al, width, (pref, suff))
-  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
-        symbol '*'
-        spaces
-        ds <- trim . untokenize <$> braced
-        spaces
-        spec <- braced
-        case safeRead ds of
-             Just n  ->
-               getInput >>= setInput . (mconcat (replicate n spec) ++)
-             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
-  bgroup
-  spaces
-  maybeBar
-  aligns' <- many $ try $ spaces >> optional starAlign >>
-                            (alignSpec <* maybeBar)
-  spaces
-  egroup
-  spaces
-  return $ map toSpec aligns'
-  where
-    toColWidth (Just w) | w > 0 = ColWidth w
-    toColWidth _                = ColWidthDefault
-    toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
-              => Text   -- ^ table environment name
-              -> [([Tok], [Tok])] -- ^ pref/suffixes
-              -> LP m Row
-parseTableRow envname prefsufs = do
-  notFollowedBy (spaces *> end_ envname)
-  -- add prefixes and suffixes in token stream:
-  let celltoks (pref, suff) = do
-        prefpos <- getPosition
-        contents <- mconcat <$>
-            many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
-                  <|>
-                   snd <$> withRaw (inlineEnvironment <|> dollarsMath)
-                  <|>
-                   (do notFollowedBy
-                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
-                       count 1 anyTok) )
-
-        suffpos <- getPosition
-        option [] (count 1 amp)
-        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
-  rawcells <- mapM celltoks prefsufs
-  oldInput <- getInput
-  cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
-  setInput oldInput
-  spaces
-  return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
-  spaces
-  updateState $ \st -> st{ sInTableCell = True }
-  cell' <-   multicolumnCell
-         <|> multirowCell
-         <|> parseSimpleCell
-         <|> parseEmptyCell
-  updateState $ \st -> st{ sInTableCell = False }
-  spaces
-  return cell'
-  where
-    -- The parsing of empty cells is important in LaTeX, especially when dealing
-    -- with multirow/multicolumn. See #6603.
-    parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
-  where
-    alignment = do
-      c <- untoken <$> singleChar
-      return $ case c of
-        "l" -> AlignLeft
-        "r" -> AlignRight
-        "c" -> AlignCenter
-        "*" -> AlignDefault
-        _   -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
-                [Para ils] -> plain (fromList ils)
-                _          -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
-  -- Full prototype for \multirow macro is:
-  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-  -- However, everything except `nrows` and `text` make
-  -- sense in the context of the Pandoc AST
-  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
-  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
-  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
-  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
-  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
-  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
-  let singleCell = do
-        content <- plainify <$> blocks
-        return $ cell alignment (RowSpan 1) (ColSpan span') content
-
-  -- Two possible contents: either a \multirow cell, or content.
-  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-  -- Note that a \multirow cell can be nested in a \multicolumn,
-  -- but not the other way around. See #6603
-  let nestedCell = do
-        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
-        return $ cell
-                  alignment
-                  (RowSpan rs)
-                  (ColSpan span')
-                  (fromList bs)
-
-  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
-  where
-    rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
-  = TableBody attr rhc th' tb'
-  where
-    th' = fixTableRows th
-    tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
-  where
-    fixTableRows' oldHang (Row attr cells : rs)
-      = let (newHang, cells') = fixTableRow oldHang cells
-            rs'               = fixTableRows' newHang rs
-        in Row attr cells' : rs'
-    fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
-  -- If there's overhang, drop cells until their total width meets the
-  -- width of the occupied grid spaces (or we run out)
-  | (n, prefHang, restHang) <- splitHang oldHang
-  , n > 0
-  = let cells' = dropToWidth getCellW n cells
-        (restHang', cells'') = fixTableRow restHang cells'
-    in (prefHang restHang', cells'')
-  -- Otherwise record the overhang of a pending cell and fix the rest
-  -- of the row
-  | c@(Cell _ _ h w _):cells' <- cells
-  = let h' = max 1 h
-        w' = max 1 w
-        oldHang' = dropToWidth getHangW w' oldHang
-        (newHang, cells'') = fixTableRow oldHang' cells'
-    in (toHang w' h' <> newHang, c : cells'')
-  | otherwise
-  = (oldHang, [])
-  where
-    getCellW (Cell _ _ _ w _) = w
-    getHangW = maybe 1 fst
-    getCS (ColSpan n) = n
-
-    toHang c r
-      | r > 1     = [Just (c, r)]
-      | otherwise = replicate (getCS c) Nothing
-
-    -- Take the prefix of the overhang list representing filled grid
-    -- spaces. Also return the remainder and the length of this prefix.
-    splitHang = splitHang' 0 id
-
-    splitHang' !n l (Just (c, r):xs)
-      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
-    splitHang' n l xs = (n, l, xs)
-
-    -- Drop list items until the total width of the dropped items
-    -- exceeds the passed width.
-    dropToWidth _     n l | n < 1 = l
-    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
-    dropToWidth _     _ []        = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
-  when hasWidthParameter $ () <$ (spaces >> tok)
-  skipopts
-  colspecs <- parseAligns
-  let (aligns, widths, prefsufs) = unzip3 colspecs
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  skipMany hline
-  spaces
-  header' <- option [] . try . fmap (:[]) $
-             parseTableRow envname prefsufs <* lbreak <* many1 hline
-  spaces
-  rows <- sepEndBy (parseTableRow envname prefsufs)
-                    (lbreak <* optional (skipMany hline))
-  spaces
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  lookAhead $ controlSeq "end" -- make sure we're at end
-  let th  = fixTableHead $ TableHead nullAttr header'
-  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
-  let tf  = TableFoot nullAttr []
-  return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
-  where go (Table attr c spec th tb tf) = do
-          st <- getState
-          let mblabel = sLastLabel st
-          capt <- case (sCaption st, mblabel) of
-                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
-                   (Just ils, Just lab) -> do
-                     num <- getNextNumber sLastTableNum
-                     setState
-                       st{ sLastTableNum = num
-                         , sLabels = M.insert lab
-                                    [Str (renderDottedNum num)]
-                                    (sLabels st) }
-                     return $ caption Nothing (plain ils) -- add number??
-                   (Nothing, _)  -> return c
-          let attr' = case (attr, mblabel) of
-                        ((_,classes,kvs), Just ident) ->
-                           (ident,classes,kvs)
-                        _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
-        go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b       = Div attr [b]
-
 block :: PandocMonad m => LP m Blocks
 block = do
-  res <- (mempty <$ spaces1)
-    <|> environment
-    <|> macroDef (rawBlock "latex")
-    <|> blockCommand
-    <|> paragraph
-    <|> grouped block
+  Tok _ toktype _ <- lookAhead anyTok
+  res <- (case toktype of
+            Newline           -> mempty <$ spaces1
+            Spaces            -> mempty <$ spaces1
+            Comment           -> mempty <$ spaces1
+            Word              -> paragraph
+            CtrlSeq "begin"   -> environment
+            CtrlSeq _         -> macroDef (rawBlock "latex")
+                               <|> blockCommand
+            _                 -> mzero)
+          <|> paragraph
+          <|> grouped block
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  polylang <- untokenize <$> braced
-  case M.lookup polylang polyglossiaLangToBCP47 of
-       Nothing -> return mempty -- TODO mzero? warning?
-       Just langFunc -> do
-         let l = langFunc o
-         setTranslations l
-         updateState $ setMeta "lang" $ str (renderLang l)
-         return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..af97125c6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+  ( citationCommands
+  , cites
+  )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+  let citation = citationWith inline
+      tok = spaces *> grouped inline
+   in M.fromList
+  [ ("cite", citation "cite" NormalCitation False)
+  , ("Cite", citation "Cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("smartcite", citation "smartcite" NormalCitation False)
+  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Smartcite", citation "Smartcite" NormalCitation False)
+  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation inline NormalCitation)
+  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+                        complexNatbibCitation inline AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
+  ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _      = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
+  keys <- try $ bgroup *> manyTill citationLabel egroup
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+  opt :: PandocMonad m => LP m Inlines
+  opt = do
+    toks <- try (sp *> bracketedToks <* sp)
+    -- now parse the toks as inlines
+    st <- getState
+    parsed <- lift $
+      runParserT (mconcat <$> many inline) st "bracketed option" toks
+    case parsed of
+      Right result -> return result
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel  = do
+  sp
+  untokenize <$>
+    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+          <* sp
+          <* optional (symbol ',')
+          <* sp)
+  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+  let paropt = parenWrapped inline
+  cits <- if multi
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 $ simpleCiteArgs inline
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ (addMprenote pre k : init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
+             else count 1 $ simpleCiteArgs inline
+  let cs = concat cits
+  return $ case mode of
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+  (c,raw) <- withRaw $ cites inline mode multi
+  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+  let isCite Cite{} = True
+      isCite _      = False
+      (pref, rest) = break isCite (toList ils)
+  in case rest of
+          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+          _                -> []
+
+complexNatbibCitation :: PandocMonad m
+                      => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+  (cs, raw) <-
+    withRaw $ concat <$> do
+      bgroup
+      items <- mconcat <$>
+                many1 (notFollowedBy (symbol ';') >> inline)
+                  `sepBy1` symbol ';'
+      egroup
+      return $ map handleCitationPart items
+  case cs of
+       []       -> mzero
+       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+  note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..7b8bca4af
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,397 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ViewPatterns          #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+  ( acronymCommands
+  , verbCommands
+  , charCommands
+  , accentCommands
+  , nameCommands
+  , biblatexInlineCommands
+  , refCommands
+  , rawInlineOr
+  , listingsLanguage
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+                            manyTill, getInput, setInput, incSourceColumn,
+                            option, many1, try)
+import Data.Char (isDigit)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList, fromMaybe)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+  if parseRaw
+     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+     else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+  v <- braced
+  let refstr = untokenize v
+  updateState $ \st ->
+    st{ sLastLabel = Just refstr }
+  return $ spanWith (refstr,[],[("label", refstr)])
+    $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+  v <- braced
+  let refstr = untokenize v
+  return $ linkWith ("",[],[ ("reference-type", cls)
+                           , ("reference", refstr)])
+                    ("#" <> refstr)
+                    ""
+                    (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  withVerbatimMode $
+    code . untokenize <$>
+      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+  t@(Tok pos toktype txt) <- anyTok
+  case T.findIndex (== stopchar) txt of
+       Nothing -> return t
+       Just i  -> do
+         let (t1, t2) = T.splitAt i txt
+         inp <- getInput
+         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+         return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+  case lookup "language" opts of
+    Nothing  -> Nothing
+    Just l   -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+  options <- option [] keyvals
+  let classes = maybeToList $ listingsLanguage options
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- untokenize <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  let stopchar = if marker == '{' then '}' else marker
+  withVerbatimMode $
+    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+      manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x    = x
+
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+  str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+  str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+  where
+    inBraces = do
+      symbol '{'
+      spaces
+      res <- parser
+      spaces
+      symbol '}'
+      return res
+    parser = do
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
+      let (digits, rest) = T.span isDigit s
+      unless (T.null rest) $
+        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+      safeRead digits
+
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space] -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       []      -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       _       -> return ils
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+  [ ("verb", doverb)
+  , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
+  , ("Verb", doverb)
+  ]
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+  in  M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+  [ ("ldots", lit "…")
+  , ("vdots", lit "\8942")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("sep", lit ",")
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  , ("qed", lit "\a0\x25FB")
+  , ("lq", return (str "‘"))
+  , ("rq", return (str "’"))
+  , ("textquoteleft", return (str "‘"))
+  , ("textquoteright", return (str "’"))
+  , ("textquotedblleft", return (str "“"))
+  , ("textquotedblright", return (str "”"))
+  , ("/", pure mempty) -- italic correction
+  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+                            guard $ not inTableCell
+                            optional rawopt
+                            spaces))
+  , (",", lit "\8198")
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("slash", lit "/")
+  -- fontawesome
+  , ("faCheck", lit "\10003")
+  , ("faClose", lit "\10007")
+  -- hyphenat
+  , ("bshyp", lit "\\\173")
+  , ("fshyp", lit "/\173")
+  , ("dothyp", lit ".\173")
+  , ("colonhyp", lit ":\173")
+  , ("hyp", lit "-")
+  ]
+
+biblatexInlineCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+  -- biblatex misc
+  [ ("RN", romanNumeralUpper)
+  , ("Rn", romanNumeralLower)
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
+  ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+  [ ("figurename", doTerm Translations.Figure)
+  , ("prefacename", doTerm Translations.Preface)
+  , ("refname", doTerm Translations.References)
+  , ("bibname", doTerm Translations.Bibliography)
+  , ("chaptername", doTerm Translations.Chapter)
+  , ("partname", doTerm Translations.Part)
+  , ("contentsname", doTerm Translations.Contents)
+  , ("listfigurename", doTerm Translations.ListOfFigures)
+  , ("listtablename", doTerm Translations.ListOfTables)
+  , ("indexname", doTerm Translations.Index)
+  , ("abstractname", doTerm Translations.Abstract)
+  , ("tablename", doTerm Translations.Table)
+  , ("enclname", doTerm Translations.Encl)
+  , ("ccname", doTerm Translations.Cc)
+  , ("headtoname", doTerm Translations.To)
+  , ("pagename", doTerm Translations.Page)
+  , ("seename", doTerm Translations.See)
+  , ("seealsoname", doTerm Translations.SeeAlso)
+  , ("proofname", doTerm Translations.Proof)
+  , ("glossaryname", doTerm Translations.Glossary)
+  , ("lstlistingname", doTerm Translations.Listing)
+  ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+  [ ("label", rawInlineOr "label" dolabel)
+  , ("ref", rawInlineOr "ref" $ doref "ref")
+  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
+  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
+  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+  -- glossaries package
+  [ ("gls", doAcronym "short")
+  , ("Gls", doAcronym "short")
+  , ("glsdesc", doAcronym "long")
+  , ("Glsdesc", doAcronym "long")
+  , ("GLSdesc", doAcronym "long")
+  , ("acrlong", doAcronym "long")
+  , ("Acrlong", doAcronym "long")
+  , ("acrfull", doAcronym "full")
+  , ("Acrfull", doAcronym "full")
+  , ("acrshort", doAcronym "abbrv")
+  , ("Acrshort", doAcronym "abbrv")
+  , ("glspl", doAcronymPlural "short")
+  , ("Glspl", doAcronymPlural "short")
+  , ("glsdescplural", doAcronymPlural "long")
+  , ("Glsdescplural", doAcronymPlural "long")
+  , ("GLSdescplural", doAcronymPlural "long")
+  -- acronyms package
+  , ("ac", doAcronym "short")
+  , ("acf", doAcronym "full")
+  , ("acs", doAcronym "abbrv")
+  , ("acl", doAcronym "long")
+  , ("acp", doAcronymPlural "short")
+  , ("acfp", doAcronymPlural "full")
+  , ("acsp", doAcronymPlural "abbrv")
+  , ("aclp", doAcronymPlural "long")
+  , ("Ac", doAcronym "short")
+  , ("Acf", doAcronym "full")
+  , ("Acs", doAcronym "abbrv")
+  , ("Acl", doAcronym "long")
+  , ("Acp", doAcronymPlural "short")
+  , ("Acfp", doAcronymPlural "full")
+  , ("Acsp", doAcronymPlural "abbrv")
+  , ("Aclp", doAcronymPlural "long")
+  ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+  acro <- braced
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "singular+" <> form)])
+    $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+  acro <- braced
+  let plural = str "s"
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "plural+" <> form)]) $
+   mconcat [str $ untokenize acro, plural]]
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Lang
-   Copyright   : Copyright (C) 2018-2020 John MacFarlane
+   Copyright   : Copyright (C) 2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to
 BCP47 'Lang'.
 -}
 module Text.Pandoc.Readers.LaTeX.Lang
-  ( polyglossiaLangToBCP47
+  ( setDefaultLanguage
+  , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , enquoteCommands
+  , inlineLanguageCommands
   )
 where
 import qualified Data.Map as M
+import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.Shared (extractSpaces)
+import Text.Collate.Lang (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+                            withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+                            singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+        => LP m Inlines
+        -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+  skipopts
+  let lang = mblang >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
+  quoteContext <- sQuoteContext <$> getState
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+                => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+  [ ("enquote*", enquote tok True Nothing)
+  , ("enquote", enquote tok False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+  ]
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+  babelLang <- untokenize <$> braced
+  case babelLangToBCP47 babelLang of
+       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
+       _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+  M.fromList $
+    ("foreignlanguage", foreignlanguage tok) :
+    (mk <$> M.toList polyglossiaLangToBCP47)
+  where
+    mk (polyglossia, bcp47Func) =
+      ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+               => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  let lang = renderLang $ bcp47Func o
+  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  polylang <- untokenize <$> braced
+  case M.lookup polylang polyglossiaLangToBCP47 of
+       Nothing -> return mempty -- TODO mzero? warning?
+       Just langFunc -> do
+         let l = langFunc o
+         setTranslations l
+         updateState $ setMeta "lang" $ str (renderLang l)
+         return mempty
 
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..5495a8e74
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+  ( macroDef
+  )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+    (_, s) <- withRaw (commandDef <|> environmentDef)
+    (constructor (untokenize s) <$
+      guardDisabled Ext_latex_macros)
+     <|> return mempty
+  where commandDef = do
+          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+          guardDisabled Ext_latex_macros <|>
+           mapM_ (\(name, macro') ->
+                   updateState (\s -> s{ sMacros = M.insert name macro'
+                                          (sMacros s) })) nameMacroPairs
+        environmentDef = do
+          mbenv <- newenvironment
+          case mbenv of
+            Nothing -> return ()
+            Just (name, macro1, macro2) ->
+              guardDisabled Ext_latex_macros <|>
+                do updateState $ \s -> s{ sMacros =
+                    M.insert name macro1 (sMacros s) }
+                   updateState $ \s -> s{ sMacros =
+                    M.insert ("end" <> name) macro2 (sMacros s) }
+        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+        -- is equivalent to
+        -- @\newcommand{\envname}[n-args][default]{begin}@
+        -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
+letmacro = do
+  controlSeq "let"
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    optional $ symbol '='
+    spaces
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
+    contents <- bracedOrToken
+    return (name, contents)
+  contents' <- doMacros' 0 contents
+  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
+  -- we use withVerbatimMode, because macros are to be expanded
+  -- at point of use, not point of definition
+  controlSeq "def"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    argspecs <- many (argspecArg <|> argspecPattern)
+    contents <- bracedOrToken
+    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+  controlSeq "newif"
+  withVerbatimMode $ do
+    Tok pos (CtrlSeq name) _ <- anyControlSeq
+    -- \def\iffoo\iffalse
+    -- \def\footrue{\def\iffoo\iftrue}
+    -- \def\foofalse{\def\iffoo\iffalse}
+    let base = T.drop 2 name
+    return [ (name, Macro ExpandWhenUsed [] Nothing
+                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+           , (base <> "true",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+                   ])
+           , (base <> "false",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+                   ])
+           ]
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+  Tok _ (Arg i) _ <- satisfyTok isArgTok
+  return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                              (toktype' == Symbol || toktype' == Word) &&
+                              (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
+newcommand = do
+  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+                             controlSeq "renewcommand" <|>
+                             controlSeq "providecommand" <|>
+                             controlSeq "DeclareMathOperator" <|>
+                             controlSeq "DeclareRobustCommand"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) txt <- do
+      optional (symbol '*')
+      anyControlSeq <|>
+        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    let argspecs = map ArgNum [1..numargs]
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    spaces
+    contents' <- bracedOrToken
+    let contents =
+         case mtype of
+              "DeclareMathOperator" ->
+                 Tok pos (CtrlSeq "mathop") "\\mathop"
+                 : Tok pos Symbol "{"
+                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+                 : Tok pos Symbol "{"
+                 : (contents' ++
+                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+              _                     -> contents'
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+        Just macro
+          | mtype == "newcommand" -> do
+              report $ MacroAlreadyDefined txt pos
+              return [(name, macro)]
+          | mtype == "providecommand" -> return [(name, macro)]
+        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+                             controlSeq "renewenvironment" <|>
+                             controlSeq "provideenvironment"
+  withVerbatimMode $ do
+    optional $ symbol '*'
+    spaces
+    name <- untokenize <$> braced
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    let argspecs = map (\i -> ArgNum i) [1..numargs]
+    startcontents <- spaces >> bracedOrToken
+    endcontents <- spaces >> bracedOrToken
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+         Just _
+           | mtype == "newenvironment" -> do
+               report $ MacroAlreadyDefined name pos
+               return Nothing
+           | mtype == "provideenvironment" ->
+               return Nothing
+         _ -> return $ Just (name,
+                      Macro ExpandWhenUsed argspecs optarg startcontents,
+                      Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+  ds <- untokenize <$> bracketedToks
+  case safeRead ds of
+       Just i -> return i
+       _      -> return 0
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+  ( dollarsMath
+  , inlineEnvironments
+  , inlineEnvironment
+  , mathInline
+  , mathDisplay
+  , theoremstyle
+  , theoremEnvironment
+  , newtheorem
+  , proof
+  )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+  symbol '$'
+  display <- option False (True <$ symbol '$')
+  (do contents <- try $ untokenize <$> pDollarsMath 0
+      if display
+         then mathDisplay contents <$ symbol '$'
+         else return $ mathInline contents)
+   <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+  tk@(Tok _ toktype t) <- anyTok
+  case toktype of
+       Symbol | t == "$"
+              , n == 0 -> return []
+              | t == "\\" -> do
+                  tk' <- anyTok
+                  (tk :) . (tk' :) <$> pDollarsMath n
+              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+              | t == "}" ->
+                if n > 0
+                then (tk :) <$> pDollarsMath (n-1)
+                else mzero
+       _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+   where inner x = case innerEnv of
+                        Nothing -> x
+                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
+                                   "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+  skipopts
+  optional blankline
+  res <- manyTill anyTok (end_ name)
+  return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+    ("displaymath", mathEnvWith id Nothing "displaymath")
+  , ("math", math <$> mathEnv "math")
+  , ("equation", mathEnvWith id Nothing "equation")
+  , ("equation*", mathEnvWith id Nothing "equation*")
+  , ("gather", mathEnvWith id (Just "gathered") "gather")
+  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+  , ("multline", mathEnvWith id (Just "gathered") "multline")
+  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+  , ("align", mathEnvWith id (Just "aligned") "align")
+  , ("align*", mathEnvWith id (Just "aligned") "align*")
+  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+  ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+  stylename <- untokenize <$> braced
+  let mbstyle = case stylename of
+                  "plain"      -> Just PlainStyle
+                  "definition" -> Just DefinitionStyle
+                  "remark"     -> Just RemarkStyle
+                  _            -> Nothing
+  case mbstyle of
+    Nothing  -> return ()
+    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+  return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+  number <- option True (False <$ symbol '*' <* sp)
+  name <- untokenize <$> braced
+  sp
+  series <- option Nothing $ Just . untokenize <$> bracketedToks
+  sp
+  showName <- tokWith inline
+  sp
+  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+  sty <- sLastTheoremStyle <$> getState
+  let spec = TheoremSpec { theoremName = showName
+                         , theoremStyle = sty
+                         , theoremSeries = series
+                         , theoremSyncTo = syncTo
+                         , theoremNumber = number
+                         , theoremLastNum = DottedNum [0] }
+  tmap <- sTheoremMap <$> getState
+  updateState $ \s -> s{ sTheoremMap =
+                            M.insert name spec tmap }
+  return mempty
+
+theoremEnvironment :: PandocMonad m
+                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+  tmap <- sTheoremMap <$> getState
+  case M.lookup name tmap of
+    Nothing -> mzero
+    Just tspec -> do
+       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+       mblabel <- option Nothing $ Just . untokenize <$>
+                   try (spaces >> controlSeq "label" >> spaces >> braced)
+       bs <- env name blocks
+       number <-
+         if theoremNumber tspec
+            then do
+               let name' = fromMaybe name $ theoremSeries tspec
+               num <- getNextNumber
+                   (maybe (DottedNum [0]) theoremLastNum .
+                    M.lookup name' . sTheoremMap)
+               updateState $ \s ->
+                 s{ sTheoremMap =
+                       M.adjust
+                       (\spec -> spec{ theoremLastNum = num })
+                       name'
+                       (sTheoremMap s)
+                  }
+
+               case mblabel of
+                 Just ident ->
+                   updateState $ \s ->
+                     s{ sLabels = M.insert ident
+                         (B.toList $
+                           theoremName tspec <> "\160" <>
+                           str (renderDottedNum num)) (sLabels s) }
+                 Nothing -> return ()
+               return $ space <> B.text (renderDottedNum num)
+            else return mempty
+       let titleEmph = case theoremStyle tspec of
+                         PlainStyle      -> B.strong
+                         DefinitionStyle -> B.strong
+                         RemarkStyle     -> B.emph
+       let title = titleEmph (theoremName tspec <> number)
+                      <> optTitle <> "." <> space
+       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+              $ case theoremStyle tspec of
+                  PlainStyle -> walk italicize bs
+                  _          -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+  title <- option (B.text "Proof") opt
+  bs <- env "proof" blocks
+  return $
+    B.divWith ("", ["proof"], []) $
+      addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+  case B.toList bs of
+    (Para xs : rest)
+      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+    _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+  case Seq.viewr (B.unMany bs) of
+    s Seq.:> Para ils
+      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+    _ -> bs <> B.para qedSign
+ where
+  qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,11 +1,12 @@
 {-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
+  , getInputTokens
   , untokenize
   , untoken
   , totoks
   , toksToString
   , satisfyTok
+  , parseFromToks
+  , disablingWithRaw
   , doMacros
   , doMacros'
   , setpos
@@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , comment
   , anyTok
   , singleChar
+  , tokWith
   , specialChars
   , endline
   , blankline
@@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawopt
   , overlaySpecification
   , getNextNumber
+  , label
+  , setCaption
+  , resetCaption
+  , env
+  , addMeta
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -87,13 +98,15 @@ import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
--- import Debug.Trace
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sLabels        :: M.Map Text [Inline]
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
-                            , sExpanded      :: Bool
+                            , sFileContents  :: M.Map Text Text
+                            , sEnableWithRaw :: Bool
+                            , sRawTokens     :: IntMap.IntMap [Tok]
                             }
      deriving Show
 
@@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sLabels        = M.empty
                               , sHasChapters   = False
                               , sToggles       = M.empty
-                              , sExpanded      = False
+                              , sFileContents  = M.empty
+                              , sEnableWithRaw = True
+                              , sRawTokens     = IntMap.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -232,21 +248,25 @@ withVerbatimMode parser = do
        updateState $ \st -> st{ sVerbatimMode = False }
        return result
 
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
   let lstate' = lstate { sMacros = extractMacros pstate }
+  let setStartPos = case toks of
+                      Tok pos _ _ : _ -> setPosition pos
+                      _ -> return ()
+  let preparser = setStartPos >> parser
   let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+                            lstate "chunk" toks
   case res' of
        Left _    -> mzero
-       Right toks' -> do
+       Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        doMacros
                                         ts <- many (satisfyTok (const True))
                                         setInput ts
                                       rawparser)
@@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                let skipTilPos stopPos = do
+                      anyChar
+                      pos <- getPosition
+                      if pos >= stopPos
+                         then return ()
+                         else skipTilPos stopPos
+                skipTilPos endpos
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
            Left e   -> Prelude.fail (show e)
            Right s' -> return s'
 
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+>   let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+  pos <- getPosition
+  ss <- getInput
+  return $
+    case ss of
+      Sources [] -> []
+      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
@@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+  oldInput <- getInput
+  setInput toks
+  oldpos <- getPosition
+  case toks of
+     Tok pos _ _ : _ -> setPosition pos
+     _ -> return ()
+  result <- disablingWithRaw parser
+  setInput oldInput
+  setPosition oldpos
+  return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+  oldEnableWithRaw <- sEnableWithRaw <$> getState
+  updateState $ \st -> st{ sEnableWithRaw = False }
+  result <- parser
+  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+  return result
+
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False }
-    return res
+    updateState $ \st ->
+      if sEnableWithRaw st
+         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+         else st
+    return $! res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
         updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
+        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
-  expanded <- sExpanded <$> getState
-  verbatimMode <- sVerbatimMode <$> getState
-  unless (expanded || verbatimMode) $ do
-      getInput >>= doMacros' 1 >>= setInput
-      updateState $ \st -> st{ sExpanded = True }
+  st <- getState
+  unless (sVerbatimMode st) $
+    getInput >>= doMacros' 1 >>= setInput
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
 doMacros' n inp =
   case inp of
      Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
       Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos name ts
+        -> handleMacros n spos name ts <|> return inp
      Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
       Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos ("end" <> name) ts
+        -> handleMacros n spos ("end" <> name) ts <|> return inp
      Tok _ (CtrlSeq "expandafter") _ : t : ts
         -> combineTok t <$> doMacros' n ts
      Tok spos (CtrlSeq name) _ : ts
-        -> handleMacros n spos name ts
+        -> handleMacros n spos name ts <|> return inp
      _ -> return inp
-   <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -482,7 +554,7 @@ doMacros' n inp =
         $ throwError $ PandocMacroLoop name
       macros <- sMacros <$> getState
       case M.lookup name macros of
-           Nothing -> mzero
+           Nothing -> trySpecialMacro name ts
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
@@ -510,6 +582,41 @@ doMacros' n inp =
                    ExpandWhenUsed    -> doMacros' (n' + 1) result
                    ExpandWhenDefined -> return result
 
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+  ts' <- doMacros' 1 ts
+  case ts' of
+    Tok pos Word t : _
+      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+    _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
+trySpecialMacro _ _ = mzero
+
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+  case res' of
+    Left _ -> Prelude.fail "Could not parse conditional"
+    Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+                    *> anyTok)
+  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+                 <|> ([] <$ controlSeq "fi")
+  rest <- getInput
+  return $ (if b then ifToks else elseToks) ++ rest
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+  case T.uncons t of
+       Just (c, _) | isAlphaNum c -> True
+       _           -> False
+
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 
@@ -592,18 +699,22 @@ isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+  satisfyTok $ \case
+     Tok _ Word  t   -> T.length t == 1
+     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t)
+     _               -> False
+
 singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+  singleCharFromWord = do
+    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+    let (t1, t2) = (T.take 1 t, T.drop 1 t)
+    inp <- getInput
+    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+    anyTok
 
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
@@ -646,28 +757,25 @@ grouped parser = try $ do
   -- {{a,b}} should be parsed the same as {a,b}
   try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
-  handleEgroup <|> handleBgroup <|> handleOther
-  where handleEgroup = do
-          t <- symbol '}'
-          if n == 1
-             then return []
-             else (t:) <$> braced' getTok (n - 1)
-        handleBgroup = do
-          t <- symbol '{'
-          (t:) <$> braced' getTok (n + 1)
-        handleOther = do
-          t <- getTok
-          (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+  go n = do
+    t <- getTok
+    case t of
+      Tok _ Symbol "}"
+        | n > 1     -> (t:) <$> go (n - 1)
+        | otherwise -> return []
+      Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+      _ -> (t:) <$> go n
 
 braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
 
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
 
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
@@ -723,16 +831,29 @@ ignore raw = do
 
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
-  inp <- getInput
+  rawTokensMap <- sRawTokens <$> getState
+  let key = case IntMap.lookupMax rawTokensMap of
+               Nothing     -> 0
+               Just (n,_)  -> n + 1
+  -- insert empty list at key
+  updateState $ \st -> st{ sRawTokens =
+                             IntMap.insert key [] $ sRawTokens st }
   result <- parser
-  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
-  let raw = takeWhile (\(Tok pos _ _) -> maybe True
-                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+  raw <- case mbRevToks of
+           Just revtoks -> do
+             updateState $ \st -> st{ sRawTokens =
+                                        IntMap.delete key $ sRawTokens st}
+             return $ reverse revtoks
+           Nothing      ->
+             throwError $ PandocShouldNeverHappenError $
+                "sRawTokens has nothing at key " <> T.pack (show key)
   return (result, raw)
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
@@ -792,7 +913,7 @@ getRawCommand name txt = do
   (_, rawargs) <- withRaw $
       case name of
            "write" -> do
-             void $ satisfyTok isWordTok -- digits
+             void $ many $ satisfyTok isDigitTok -- digits
              void braced
            "titleformat" -> do
              void braced
@@ -807,6 +928,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _              = False
+
 skipopts :: PandocMonad m => LP m ()
 skipopts = skipMany (void overlaySpecification <|> void rawopt)
 
@@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do
                Just n  -> [n, 1]
                Nothing -> [1]
 
+label :: PandocMonad m => LP m ()
+label = do
+  controlSeq "label"
+  t <- braced
+  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+  skipopts
+  ils <- tokWith inline
+  optional $ try $ spaces *> label
+  updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption   = Nothing
+                                      , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+                                 grouped inlineParser
+                            <|> (lookAhead anyControlSeq >> inlineParser)
+                            <|> singleChar'
+  where singleChar' = do
+          Tok _ _ t <- singleChar
+          return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+   st{ sMeta = addMetaField field val $ sMeta st }
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,7 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
-  ( dosi
-  , doSI
-  , doSIrange
-  , doSInum
-  , doSInumlist
-  , doSIang
-  )
+  ( siunitxCommands )
 where
 import Text.Pandoc.Builder
 import Text.Pandoc.Readers.LaTeX.Parsing
@@ -15,14 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
+
+siunitxCommands :: PandocMonad m
+                 => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+  [ ("si", dosi tok)
+  , ("SI", doSI tok)
+  , ("SIrange", doSIrange True tok)
+  , ("numrange", doSIrange False tok)
+  , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
+  , ("num", doSInum)
+  , ("ang", doSIang)
+  ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -57,23 +70,50 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
   parseComma <|>
   parsePlusMinus <|>
+  parsePM <|>
   parseI <|>
   parseExp <|>
   parseX <|>
   parseSpace
  where
-  parseDecimalNum = do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
-    basenum <- (pref <>) . T.pack
-                <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+  parseDecimalNum, parsePlusMinus, parsePM,
+    parseComma, parseI, parseX,
+    parseExp, parseSpace :: Parser Text () Inlines
+  parseDecimalNum = try $ do
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
+    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+    let basenum = pref <> T.pack
+                    (case basenum' of
+                      '.':_ -> '0':basenum'
+                      _ -> basenum')
     uncertainty <- option mempty $ T.pack <$> parseParens
     if T.null uncertainty
        then return $ str basenum
@@ -91,6 +131,7 @@ parseNumPart =
                                                | otherwise -> "." <> t
   parseComma = str "." <$ char ','
   parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
   parseParens =
     char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
   parseI = str "i" <$ char 'i'
@@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines
 doSIang = do
   skipopts
   ps <- T.splitOn ";" . untokenize <$> braced
+  let dropPlus t = case T.uncons t of
+                     Just ('+',t') -> t'
+                     _ -> t
   case ps ++ repeat "" of
     (d:m:s:_) -> return $
-      (if T.null d then mempty else str d <> str "\xb0") <>
-      (if T.null m then mempty else str m <> str "\x2032") <>
-      (if T.null s then mempty else str s <> str "\x2033")
+      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
     _ -> return mempty
 
 -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
@@ -136,40 +180,99 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
-  Tok _ (CtrlSeq name) _ <- anyControlSeq
-  case name of
-    "square" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "2"
-    "cubic" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "3"
-    "raisetothe" -> do
-       n <- tok
-       unit <- siUnit tok
-       return $ unit <> superscript n
-    _ ->
-       case M.lookup name siUnitMap of
-            Just il ->
-              option il $
-                choice
-                 [ (il <> superscript "2") <$ controlSeq "squared"
-                 , (il <> superscript "3") <$ controlSeq "cubed"
-                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
-                 ]
-            Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
-         return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
-         return (str t))
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
+ where
+  siUnitPart :: LP m Inlines
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
+  siPrefix :: LP m (Inlines -> Inlines)
+  siPrefix =
+       (do _ <- controlSeq "square"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubic"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "raisetothe"
+           skipopts
+           n <- walk hyphenToMinus <$> tok
+           return (<> superscript n))
+  siSuffix :: LP m (Inlines -> Inlines)
+  siSuffix =
+       (do _ <- controlSeq "squared"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubed"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "tothe"
+           skipopts
+           n <- walk hyphenToMinus <$> tok
+           return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
+  siBase :: LP m Inlines
+  siBase =
+    ((try
+       (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
+    <|> (do Tok _ Word t <- satisfyTok isWordTok
+            return $ str t)
+     )
+
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
 
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
@@ -269,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -277,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -329,25 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("per", str "/")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..f56728fe1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,379 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+  ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+                  => LP m Blocks
+                  -> LP m Inlines
+                  -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+  M.fromList
+  [ ("longtable",  env "longtable" $
+          resetCaption *>
+            simpTable blocks inline "longtable" False >>= addTableCaption)
+  , ("table",  env "table" $
+          skipopts *> resetCaption *> blocks >>= addTableCaption)
+  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
+  ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+  spaces
+  controlSeq "hline" <|>
+    (controlSeq "cline" <* braced) <|>
+    -- booktabs rules:
+    controlSeq "toprule" <|>
+    controlSeq "bottomrule" <|>
+    controlSeq "midrule" <|>
+    controlSeq "endhead" <|>
+    controlSeq "endfirsthead"
+  spaces
+  optional rawopt
+  return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+         <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+  inp <- getInput
+  case inp of
+       (Tok spos Word t : rest) ->
+         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+       _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+  let maybeBar = skipMany
+        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+  let cAlign = AlignCenter <$ symbol 'c'
+  let lAlign = AlignLeft <$ symbol 'l'
+  let rAlign = AlignRight <$ symbol 'r'
+  let parAlign = AlignLeft <$ symbol 'p'
+  -- aligns from tabularx
+  let xAlign = AlignLeft <$ symbol 'X'
+  let mAlign = AlignLeft <$ symbol 'm'
+  let bAlign = AlignLeft <$ symbol 'b'
+  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
+                                 <|> xAlign <|> mAlign <|> bAlign )
+  let alignPrefix = symbol '>' >> braced
+  let alignSuffix = symbol '<' >> braced
+  let colWidth = try $ do
+        symbol '{'
+        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+        spaces
+        symbol '}'
+        return $ safeRead ds
+  let alignSpec = do
+        pref <- option [] alignPrefix
+        spaces
+        al <- alignChar
+        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+                                                 pos <- getPosition
+                                                 report $ SkippedContent s pos
+                                                 return Nothing)
+        spaces
+        suff <- option [] alignSuffix
+        return (al, width, (pref, suff))
+  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+        symbol '*'
+        spaces
+        ds <- trim . untokenize <$> braced
+        spaces
+        spec <- braced
+        case safeRead ds of
+             Just n  ->
+               getInput >>= setInput . (mconcat (replicate n spec) ++)
+             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+  bgroup
+  spaces
+  maybeBar
+  aligns' <- many $ try $ spaces >> optional starAlign >>
+                            (alignSpec <* maybeBar)
+  spaces
+  egroup
+  spaces
+  return $ map toSpec aligns'
+  where
+    toColWidth (Just w) | w > 0 = ColWidth w
+    toColWidth _                = ColWidthDefault
+    toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+              => LP m Blocks -- ^ block parser
+              -> LP m Inlines -- ^ inline parser
+              -> Text   -- ^ table environment name
+              -> [([Tok], [Tok])] -- ^ pref/suffixes
+              -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+  notFollowedBy (spaces *> end_ envname)
+  -- contexts that can contain & that is not colsep:
+  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
+      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
+      canContainAmp _       = False
+  -- add prefixes and suffixes in token stream:
+  let celltoks (pref, suff) = do
+        prefpos <- getPosition
+        contents <- mconcat <$>
+            many ( snd <$> withRaw
+                     ((lookAhead (controlSeq "parbox") >>
+                       void blocks) -- #5711
+                      <|>
+                      (lookAhead (satisfyTok canContainAmp) >> void inline)
+                      <|>
+                      (lookAhead (symbol '$') >> void inline))
+                  <|>
+                   (do notFollowedBy
+                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
+                       count 1 anyTok) )
+
+        suffpos <- getPosition
+        option [] (count 1 amp)
+        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+  rawcells <- mapM celltoks prefsufs
+  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+  spaces
+  return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+  spaces
+  updateState $ \st -> st{ sInTableCell = True }
+  cell' <-   multicolumnCell blocks
+         <|> multirowCell blocks
+         <|> parseSimpleCell
+         <|> parseEmptyCell
+  updateState $ \st -> st{ sInTableCell = False }
+  spaces
+  return cell'
+  where
+    -- The parsing of empty cells is important in LaTeX, especially when dealing
+    -- with multirow/multicolumn. See #6603.
+    parseEmptyCell = spaces $> emptyCell
+    parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+  where
+    alignment = do
+      c <- untoken <$> singleChar
+      return $ case c of
+        "l" -> AlignLeft
+        "r" -> AlignRight
+        "c" -> AlignCenter
+        "*" -> AlignDefault
+        _   -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+                [Para ils] -> plain (fromList ils)
+                _          -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+  -- Full prototype for \multirow macro is:
+  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+  -- However, everything except `nrows` and `text` make
+  -- sense in the context of the Pandoc AST
+  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
+  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
+  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
+  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+  let singleCell = do
+        content <- plainify <$> blocks
+        return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+  -- Two possible contents: either a \multirow cell, or content.
+  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+  -- Note that a \multirow cell can be nested in a \multicolumn,
+  -- but not the other way around. See #6603
+  let nestedCell = do
+        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+        return $ cell
+                  alignment
+                  (RowSpan rs)
+                  (ColSpan span')
+                  (fromList bs)
+
+  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+  where
+    rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+  = TableBody attr rhc th' tb'
+  where
+    th' = fixTableRows th
+    tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+  where
+    fixTableRows' oldHang (Row attr cells : rs)
+      = let (newHang, cells') = fixTableRow oldHang cells
+            rs'               = fixTableRows' newHang rs
+        in Row attr cells' : rs'
+    fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+  -- If there's overhang, drop cells until their total width meets the
+  -- width of the occupied grid spaces (or we run out)
+  | (n, prefHang, restHang) <- splitHang oldHang
+  , n > 0
+  = let cells' = dropToWidth getCellW n cells
+        (restHang', cells'') = fixTableRow restHang cells'
+    in (prefHang restHang', cells'')
+  -- Otherwise record the overhang of a pending cell and fix the rest
+  -- of the row
+  | c@(Cell _ _ h w _):cells' <- cells
+  = let h' = max 1 h
+        w' = max 1 w
+        oldHang' = dropToWidth getHangW w' oldHang
+        (newHang, cells'') = fixTableRow oldHang' cells'
+    in (toHang w' h' <> newHang, c : cells'')
+  | otherwise
+  = (oldHang, [])
+  where
+    getCellW (Cell _ _ _ w _) = w
+    getHangW = maybe 1 fst
+    getCS (ColSpan n) = n
+
+    toHang c r
+      | r > 1     = [Just (c, r)]
+      | otherwise = replicate (getCS c) Nothing
+
+    -- Take the prefix of the overhang list representing filled grid
+    -- spaces. Also return the remainder and the length of this prefix.
+    splitHang = splitHang' 0 id
+
+    splitHang' !n l (Just (c, r):xs)
+      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+    splitHang' n l xs = (n, l, xs)
+
+    -- Drop list items until the total width of the dropped items
+    -- exceeds the passed width.
+    dropToWidth _     n l | n < 1 = l
+    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
+    dropToWidth _     _ []        = []
+
+simpTable :: PandocMonad m
+          => LP m Blocks
+          -> LP m Inlines
+          -> Text
+          -> Bool
+          -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+  when hasWidthParameter $ () <$ tokWith inline
+  skipopts
+  colspecs <- parseAligns
+  let (aligns, widths, prefsufs) = unzip3 colspecs
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  skipMany hline
+  spaces
+  header' <- option [] . try . fmap (:[]) $
+             parseTableRow blocks inline envname prefsufs <*
+               lbreak <* many1 hline
+  spaces
+  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+                    (lbreak <* optional (skipMany hline))
+  spaces
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  let th  = fixTableHead $ TableHead nullAttr header'
+  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+  let tf  = TableFoot nullAttr []
+  return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+  where go (Table attr c spec th tb tf) = do
+          st <- getState
+          let mblabel = sLastLabel st
+          capt <- case (sCaption st, mblabel) of
+                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
+                   (Just ils, Just lab) -> do
+                     num <- getNextNumber sLastTableNum
+                     setState
+                       st{ sLastTableNum = num
+                         , sLabels = M.insert lab
+                                    [Str (renderDottedNum num)]
+                                    (sLabels st) }
+                     return $ caption Nothing (plain ils) -- add number??
+                   (Nothing, _)  -> return c
+          let attr' = case (attr, mblabel) of
+                        ((_,classes,kvs), Just ident) ->
+                           (ident,classes,kvs)
+                        _ -> attr
+          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+        go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b       = Div attr [b]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,7 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..2dc7ddf52 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,15 +21,17 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
+import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, takeDirectory)
+import qualified System.FilePath.Windows as Windows
+import qualified System.FilePath.Posix as Posix
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -45,20 +47,22 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
                                  isCommentTag, isInlineTag, isTextTag)
 import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
+
+type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -79,7 +83,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -95,14 +99,12 @@ yamlToRefs :: PandocMonad m
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
-        oldPos <- getPosition
         case mbfp of
           Nothing -> return ()
           Just fp -> setPosition $ initialPos fp
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
-        setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -145,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -247,51 +249,48 @@ titleBlock :: PandocMonad m => MarkdownParser m ()
 titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
 pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
+pandocTitleBlock = do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
-  title <- option mempty titleLine
-  author <- option (return []) authorsLine
-  date <- option mempty dateLine
-  optional blanklines
-  let meta' = do title' <- title
-                 author' <- author
-                 date' <- date
-                 return $
-                     (if null title' then id else B.setMeta "title" title')
-                   . (if null author' then id else B.setMeta "author" author')
-                   . (if null date' then id else B.setMeta "date" date')
-                   $ nullMeta
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
+  try $ do
+    title <- option mempty titleLine
+    author <- option (return []) authorsLine
+    date <- option mempty dateLine
+    optional blanklines
+    let meta' = do title' <- title
+                   author' <- author
+                   date' <- date
+                   return $
+                       (if null title'
+                           then id
+                           else B.setMeta "title" title')
+                     . (if null author'
+                           then id
+                           else B.setMeta "author" author')
+                     . (if null date'
+                           then id
+                           else B.setMeta "date" date')
+                     $ nullMeta
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-              $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+  newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks)
   -- Since `<>` is left-biased, existing values are not touched:
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
   return mempty
 
-stopLine :: PandocMonad m => MarkdownParser m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
+mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
-  firstPair <- kvPair False
-  restPairs <- many (kvPair True)
-  let kvPairs = firstPair : restPairs
-  blanklines
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
-                             return (Meta $ M.fromList kvPairs) }
+  try $ do
+    firstPair <- kvPair False
+    restPairs <- many (kvPair True)
+    let kvPairs = firstPair : restPairs
+    blanklines
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                               return (Meta $ M.fromList kvPairs) }
 
 kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
 kvPair allowEmpty = try $ do
@@ -300,7 +299,7 @@ kvPair allowEmpty = try $ do
           (try $ newline >> lookAhead (blankline <|> nonspaceChar))
   guard $ allowEmpty || not (T.null val)
   let key' = T.concat $ T.words $ T.toLower key
-  let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
+  let val' = MetaInlines $ B.toList $ B.text val
   return (key',val')
 
 parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
@@ -334,10 +333,14 @@ referenceKey = try $ do
                     skipMany spaceChar
                     notFollowedBy' referenceTitle
                     notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+                    notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
+                                     try (spnl <* keyValAttr)
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
-  src <- try betweenAngles <|> sourceURL
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  src <- (if rebase then rebasePath pos else id) <$>
+             (try betweenAngles <|> sourceURL)
   tit <- option "" referenceTitle
   attr   <- option nullAttr $ try $
               do guardEnabled Ext_link_attributes
@@ -346,7 +349,7 @@ referenceKey = try $ do
   addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
                           >> many (try $ spnl >> keyValAttr)
   blanklines
-  let attr'  = extractIdClass $ foldl (\x f -> f x) attr addKvs
+  let attr'  = extractIdClass $ foldl' (\x f -> f x) attr addKvs
       target = (escapeURI $ trimr src, tit)
   st <- getState
   let oldkeys = stateKeys st
@@ -442,7 +445,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks)
 block = do
   res <- choice [ mempty <$ blanklines
                , codeBlockFenced
-               , yamlMetaBlock
+               , yamlMetaBlock'
                -- note: bulletList needs to be before header because of
                -- the possibility of empty list items: -
                , bulletList
@@ -568,7 +571,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +591,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -602,7 +605,7 @@ attributes = try $ do
   spnl
   attrs <- many (attribute <* spnl)
   char '}'
-  return $ foldl (\x f -> f x) nullAttr attrs
+  return $ foldl' (\x f -> f x) nullAttr attrs
 
 attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
 attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
@@ -659,15 +662,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
 codeBlockFenced = try $ do
   indentchars <- nonindentSpaces
   let indentLevel = T.length indentchars
-  c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+  c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
      <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
   size <- blockDelimiter (== c) Nothing
   skipMany spaceChar
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+         ((guardEnabled Ext_fenced_code_attributes >> try attributes)
           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
   blankline
   contents <- T.intercalate "\n" <$>
@@ -732,7 +735,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1028,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1118,6 +1121,7 @@ rawTeXBlock = do
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
   indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
@@ -1131,7 +1135,9 @@ rawHtmlBlocks = do
                  gobbleAtMostSpaces indentlevel
                  notFollowedBy' closer
                  block
-  contents <- mconcat <$> many block'
+  contents <- if selfClosing
+                 then return mempty
+                 else mconcat <$> many block'
   result <-
     try
     (do gobbleAtMostSpaces indentlevel
@@ -1155,11 +1161,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
 --
 
 lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
+lineBlock = do
   guardEnabled Ext_line_blocks
-  lines' <- lineBlockLines >>=
-            mapM (parseFromString' (trimInlinesF <$> inlines))
-  return $ B.lineBlock <$> sequence lines'
+  try $ do
+    lines' <- lineBlockLines >>=
+              mapM (parseFromString' (trimInlinesF <$> inlines))
+    return $ B.lineBlock <$> sequence lines'
 
 --
 -- Tables
@@ -1169,7 +1176,7 @@ lineBlock = try $ do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1238,7 +1245,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1261,11 +1268,12 @@ multilineRow indices = do
 -- Parses a table caption:  inlines beginning with 'Table:'
 -- and followed by blank lines.
 tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
+tableCaption = do
   guardEnabled Ext_table_captions
-  skipNonindentSpaces
-  (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
-  trimInlinesF <$> inlines1 <* blanklines
+  try $ do
+    skipNonindentSpaces
+    (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+    trimInlinesF <$> inlines1 <* blanklines
 
 -- Parse a simple table with '---' header and one line per row.
 simpleTable :: PandocMonad m
@@ -1351,7 +1359,7 @@ pipeTable = try $ do
   lines' <- many pipeTableRow
   let lines'' = map (take (length aligns) <$>) lines'
   let maxlength = maximum $
-       map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
+       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
   numColumns <- getOption readerColumns
   let widths = if maxlength > numColumns
                   then map (\len ->
@@ -1388,7 +1396,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1404,10 +1412,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1434,15 +1446,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks)
 table = try $ do
   frontCaption <- option Nothing (Just <$> tableCaption)
   (aligns, widths, heads, lns) <-
-         try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable False) <|>
-         try (guardEnabled Ext_simple_tables >>
-                (simpleTable True <|> simpleTable False)) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable True) <|>
-         try (guardEnabled Ext_grid_tables >>
-                (gridTable False <|> gridTable True)) <?> "table"
+         (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
+         (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
+         (guardEnabled Ext_simple_tables >>
+                try (simpleTable True <|> simpleTable False)) <|>
+         (guardEnabled Ext_multiline_tables >>
+                try (multilineTable True)) <|>
+         (guardEnabled Ext_grid_tables >>
+                try (gridTable False <|> gridTable True)) <?> "table"
   optional blanklines
   caption <- case frontCaption of
                   Nothing -> option (return mempty) tableCaption
@@ -1476,35 +1487,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
 inlines1 = mconcat <$> many1 inline
 
 inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
-                , bareURL
-                , str
-                , endline
-                , code
-                , strongOrEmph
-                , note
-                , cite
-                , bracketedSpan
-                , link
-                , image
-                , math
-                , strikeout
-                , subscript
-                , superscript
-                , inlineNote  -- after superscript because of ^[link](/foo)^
-                , autoLink
-                , spanHtml
-                , rawHtmlInline
-                , escapedNewline
-                , escapedChar
-                , rawLaTeXInline'
-                , exampleRef
-                , smart
-                , return . B.singleton <$> charRef
-                , emoji
-                , symbol
-                , ltSign
-                ] <?> "inline"
+inline = do
+  c <- lookAhead anyChar
+  ((case c of
+     ' '     -> whitespace
+     '\t'    -> whitespace
+     '\n'    -> endline
+     '`'     -> code
+     '_'     -> strongOrEmph
+     '*'     -> strongOrEmph
+     '^'     -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^
+     '['     -> note <|> cite <|> bracketedSpan <|> link
+     '!'     -> image
+     '$'     -> math
+     '~'     -> strikeout <|> subscript
+     '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
+     '\\'    -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '@'     -> cite <|> exampleRef
+     '"'     -> smart
+     '\''    -> smart
+     '\8216' -> smart
+     '\145'  -> smart
+     '\8220' -> smart
+     '\147'  -> smart
+     '-'     -> smart
+     '.'     -> smart
+     '&'     -> return . B.singleton <$> charRef
+     ':'     -> emoji
+     _       -> mzero)
+   <|> bareURL
+   <|> str
+   <|> symbol) <?> "inline"
 
 escapedChar' :: PandocMonad m => MarkdownParser m Char
 escapedChar' = try $ do
@@ -1515,11 +1528,12 @@ escapedChar' = try $ do
      <|> oneOf "\\`*_{}[]()>#+-.!~\""
 
 escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedNewline = try $ do
+escapedNewline = do
   guardEnabled Ext_escaped_line_breaks
-  char '\\'
-  lookAhead (char '\n') -- don't consume the newline (see #3730)
-  return $ return B.linebreak
+  try $ do
+    char '\\'
+    lookAhead (char '\n') -- don't consume the newline (see #3730)
+    return $ return B.linebreak
 
 escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
 escapedChar = do
@@ -1541,19 +1555,20 @@ ltSign = do
 -- whole document has been parsed.  But we need this parser
 -- here in case citations is disabled.
 exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
+exampleRef = do
   guardEnabled Ext_example_lists
-  char '@'
-  lab <- mconcat . map T.pack <$>
-                    many (many1 alphaNum <|>
-                          try (do c <- char '_' <|> char '-'
-                                  cs <- many1 alphaNum
-                                  return (c:cs)))
-  return $ do
-    st <- askF
-    return $ case M.lookup lab (stateExamples st) of
-                  Just n  -> B.str $ tshow n
-                  Nothing -> B.str $ "@" <> lab
+  try $ do
+    char '@'
+    lab <- mconcat . map T.pack <$>
+                      many (many1 alphaNum <|>
+                            try (do c <- char '_' <|> char '-'
+                                    cs <- many1 alphaNum
+                                    return (c:cs)))
+    return $ do
+      st <- askF
+      return $ case M.lookup lab (stateExamples st) of
+                    Just n  -> B.str $ tshow n
+                    Nothing -> B.str $ "@" <> lab
 
 symbol :: PandocMonad m => MarkdownParser m (F Inlines)
 symbol = do
@@ -1580,10 +1595,10 @@ code = try $ do
                   >> count (length starts) (char '`')
                   >> notFollowedBy (char '`'))
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+         (guardEnabled Ext_inline_code_attributes >> try attributes))
   return $ return $
     case rawattr of
          Left syn   -> B.rawInline syn result
@@ -1676,38 +1691,40 @@ strikeout = fmap B.strikeout <$>
           strikeEnd   = try $ string "~~"
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript = do
   guardEnabled Ext_superscript
-  char '^'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '^'))
+  fmap B.superscript <$> try (do
+    char '^'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '^'))
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript = do
   guardEnabled Ext_subscript
-  char '~'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '~'))
+  fmap B.subscript <$> try (do
+    char '~'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '~'))
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
@@ -1790,15 +1807,16 @@ link = try $ do
   regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
 
 bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
+bracketedSpan = do
   guardEnabled Ext_bracketed_spans
-  (lab,_) <- reference
-  attr <- attributes
-  return $ if isSmallCaps attr
-              then B.smallcaps <$> lab
-              else if isUnderline attr
-                      then B.underline <$> lab
-                      else B.spanWith attr <$> lab
+  try $ do
+    (lab,_) <- reference
+    attr <- attributes
+    return $ if isSmallCaps attr
+                then B.smallcaps <$> lab
+                else if isUnderline attr
+                        then B.underline <$> lab
+                        else B.spanWith attr <$> lab
 
 -- | We treat a span as SmallCaps if class is "smallcaps" (and
 -- no other attributes are set or if style is "font-variant:small-caps"
@@ -1825,9 +1843,12 @@ regLink :: PandocMonad m
         -> MarkdownParser m (F Inlines)
 regLink constructor lab = try $ do
   (src, tit) <- source
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
+  let src' = if rebase then rebasePath pos src else src
   attr <- option nullAttr $
           guardEnabled Ext_link_attributes >> attributes
-  return $ constructor attr src tit <$> lab
+  return $ constructor attr src' tit <$> lab
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: PandocMonad m
@@ -1867,7 +1888,8 @@ referenceLink constructor (lab, raw) = do
                    Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
                    Nothing              -> makeFallback
             else makeFallback
-       Just ((src,tit), attr) -> constructor attr src tit <$> lab
+       Just ((src,tit), attr) ->
+           constructor attr src tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
@@ -1877,12 +1899,13 @@ dropBrackets = dropRB . dropLB
         dropLB xs                          = xs
 
 bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
+bareURL = do
   guardEnabled Ext_autolink_bare_uris
   getState >>= guard . stateAllowLinks
-  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
-  return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
+  try $ do
+    (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
+    notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
+    return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
 
 autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
 autoLink = try $ do
@@ -1899,15 +1922,33 @@ autoLink = try $ do
   return $ return $ B.linkWith attr (src <> escapeURI extra) ""
                      (B.str $ orig <> extra)
 
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position.  Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+  let fp = sourceName pos
+      isFragment = T.take 1 path == "#"
+      path' = T.unpack path
+      isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path'
+   in if T.null path || isFragment || isAbsolutePath || isURI path
+         then path
+         else
+           case takeDirectory fp of
+             ""  -> path
+             "." -> path
+             d   -> T.pack d <> "/" <> path
+
 image :: PandocMonad m => MarkdownParser m (F Inlines)
 image = try $ do
   char '!'
   (lab,raw) <- reference
   defaultExt <- getOption readerDefaultImageExtension
-  let constructor attr' src = case takeExtension (T.unpack src) of
-                          "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
-                                                          $ T.unpack defaultExt)
-                          _  -> B.imageWith attr' src
+  let constructor attr' src =
+         case takeExtension (T.unpack src) of
+            "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+                                            $ T.unpack defaultExt)
+            _  -> B.imageWith attr' src
   regLink constructor lab <|> referenceLink constructor (lab,raw)
 
 note :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1935,23 +1976,25 @@ note = try $ do
           return $ B.note $ walk adjustCite contents'
 
 inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
+inlineNote = do
   guardEnabled Ext_inline_notes
-  char '^'
-  updateState $ \st -> st{ stateInNote = True
-                         , stateNoteNumber = stateNoteNumber st + 1 }
-  contents <- inlinesInBalancedBrackets
-  updateState $ \st -> st{ stateInNote = False }
-  return $ B.note . B.para <$> contents
+  try $ do
+    char '^'
+    updateState $ \st -> st{ stateInNote = True
+                           , stateNoteNumber = stateNoteNumber st + 1 }
+    contents <- inlinesInBalancedBrackets
+    updateState $ \st -> st{ stateInNote = False }
+    return $ B.note . B.para <$> contents
 
 rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
+rawLaTeXInline' = do
   guardEnabled Ext_raw_tex
   notFollowedBy' rawConTeXtEnvironment
-  s <- rawLaTeXInline
-  return $ return $ B.rawInline "tex" s -- "tex" because it might be context
+  try $ do
+    s <- rawLaTeXInline
+    return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1960,7 +2003,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
@@ -1968,55 +2011,60 @@ inBrackets parser = do
   return $ "[" <> contents <> "]"
 
 spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
+spanHtml = do
   guardEnabled Ext_native_spans
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
-  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
-  let ident = fromMaybe "" $ lookup "id" attrs
-  let classes = maybe [] T.words $ lookup "class" attrs
-  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ if isSmallCaps (ident, classes, keyvals)
-              then B.smallcaps <$> contents
-              else if isUnderline (ident, classes, keyvals)
-                      then B.underline <$> contents
-                      else B.spanWith (ident, classes, keyvals) <$> contents
+  try $ do
+    (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+    contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
+    let ident = fromMaybe "" $ lookup "id" attrs
+    let classes = maybe [] T.words $ lookup "class" attrs
+    let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+    return $ if isSmallCaps (ident, classes, keyvals)
+                then B.smallcaps <$> contents
+                else if isUnderline (ident, classes, keyvals)
+                        then B.underline <$> contents
+                        else B.spanWith (ident, classes, keyvals) <$> contents
 
 divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
+divHtml = do
   guardEnabled Ext_native_divs
-  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-  -- we set stateInHtmlBlock so that closing tags that can be either block or
-  -- inline will not be parsed as inline tags
-  oldInHtmlBlock <- stateInHtmlBlock <$> getState
-  updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
-  bls <- option "" (blankline >> option "" blanklines)
-  contents <- mconcat <$>
-              many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
-  closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
-  if closed
-     then do
-       updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
-       let ident = fromMaybe "" $ lookup "id" attrs
-       let classes = maybe [] T.words $ lookup "class" attrs
-       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-       return $ B.divWith (ident, classes, keyvals) <$> contents
-     else -- avoid backtracing
-       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+  try $ do
+    (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+    -- we set stateInHtmlBlock so that closing tags that can be either block
+    -- or inline will not be parsed as inline tags
+    oldInHtmlBlock <- stateInHtmlBlock <$> getState
+    updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+    bls <- option "" (blankline >> option "" blanklines)
+    contents <- mconcat <$>
+                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+    closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
+    if closed
+       then do
+         updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+         let ident = fromMaybe "" $ lookup "id" attrs
+         let classes = maybe [] T.words $ lookup "class" attrs
+         let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+         return $ B.divWith (ident, classes, keyvals) <$> contents
+       else -- avoid backtracing
+         return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
 
 divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-divFenced = try $ do
+divFenced = do
   guardEnabled Ext_fenced_divs
-  string ":::"
-  skipMany (char ':')
-  skipMany spaceChar
-  attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
-  skipMany spaceChar
-  skipMany (char ':')
-  blankline
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
-  bs <- mconcat <$> manyTill block divFenceEnd
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
-  return $ B.divWith attribs <$> bs
+  try $ do
+    string ":::"
+    skipMany (char ':')
+    skipMany spaceChar
+    attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+    skipMany spaceChar
+    skipMany (char ':')
+    blankline
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+    bs <- mconcat <$> manyTill block divFenceEnd
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+    return $ B.divWith attribs <$> bs
 
 divFenceEnd :: PandocMonad m => MarkdownParser m ()
 divFenceEnd = try $ do
@@ -2048,14 +2096,15 @@ emojiChars :: [Char]
 emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
 
 emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
+emoji = do
   guardEnabled Ext_emoji
-  char ':'
-  emojikey <- many1Char (oneOf emojiChars)
-  char ':'
-  case emojiToInline emojikey of
-    Just i -> return (return $ B.singleton i)
-    Nothing -> mzero
+  try $ do
+    char ':'
+    emojikey <- many1Char (oneOf emojiChars)
+    char ':'
+    case emojiToInline emojikey of
+      Just i -> return (return $ B.singleton i)
+      Nothing -> mzero
 
 -- Citations
 
@@ -2074,7 +2123,7 @@ cite = do
 
 textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
 textualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey True
   -- If this is a reference to an earlier example list item,
   -- then don't parse it as a citation.  If the example list
   -- item comes later, we'll parse it here and figure out in
@@ -2154,7 +2203,7 @@ prefix = trimInlinesF . mconcat <$>
   manyTill inline (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
-                   citeKey
+                   citeKey True
                    return ']'))
 
 citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@@ -2163,7 +2212,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
 citation :: PandocMonad m => MarkdownParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey True
   suff <- suffix
   noteNum <- stateNoteNumber <$> getState
   return $ do
@@ -2182,28 +2231,30 @@ citation = try $ do
 smart :: PandocMonad m => MarkdownParser m (F Inlines)
 smart = do
   guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [apostrophe, dash, ellipses])
+  doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|>
+    (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses)
 
 singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
+singleQuoted = do
   singleQuoteStart
-  withQuoteContext InSingleQuote $
+  (try (withQuoteContext InSingleQuote $
     fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline singleQuoteEnd
+      many1Till inline singleQuoteEnd))
+    <|> (return (return (B.str "\8217")))
 
 -- doubleQuoted will handle regular double-quoted sections, as well
 -- as dialogues with an open double-quote without a close double-quote
 -- in the same paragraph.
 doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
+doubleQuoted = do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $
+  (try (withQuoteContext InDoubleQuote $
     fmap B.doubleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline doubleQuoteEnd
+      many1Till inline doubleQuoteEnd))
+    <|> (return (return (B.str "\8220")))
 
 toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
 
 toHeaderRow :: [Blocks] -> [Row]
-toHeaderRow l = [toRow l | not (null l)]
+toHeaderRow l = [toRow l | not (null l) && not (all null l)]
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e8985ab2c..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
@@ -112,12 +113,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
 isBlockTag' :: Tag Text -> Bool
 isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
+isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
 isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
 isBlockTag' tag = isBlockTag tag
 
 isInlineTag' :: Tag Text -> Bool
 isInlineTag' (TagComment _) = True
+isInlineTag' (TagClose "ref") = False -- see below inlineTag
 isInlineTag' t              = not (isBlockTag' t)
 
 eitherBlockOrInline :: [Text]
@@ -554,11 +557,17 @@ variable = try $ do
   contents <- manyTillChar anyChar (try $ string "}}}")
   return $ "{{{" <> contents <> "}}}"
 
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain bs =
+  case B.toList bs of
+    [Para ils] -> B.fromList [Plain ils]
+    _ -> bs
+
 inlineTag :: PandocMonad m => MWParser m Inlines
 inlineTag = do
   (tag, _) <- lookAhead $ htmlTag isInlineTag'
   case tag of
-       TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+       TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
        TagOpen "nowiki" _ -> try $ do
           (_,raw) <- htmlTag (~== tag)
           if T.any (== '/') raw
@@ -678,19 +687,17 @@ url = do
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
 inlinesBetween start end =
-  trimInlines . mconcat <$> try (start >> many1Till inner end)
-    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace <* notFollowedBy' end
+  trimInlines . mconcat <$> try (start >> many1Till inline end)
 
 emph :: PandocMonad m => MWParser m Inlines
 emph = B.emph <$> nested (inlinesBetween start end)
-    where start = sym "''" >> lookAhead nonspaceChar
+    where start = sym "''"
           end   = try $ notFollowedBy' (() <$ strong) >> sym "''"
 
 strong :: PandocMonad m => MWParser m Inlines
 strong = B.strong <$> nested (inlinesBetween start end)
-    where start = sym "'''" >> lookAhead nonspaceChar
-          end   = try $ sym "'''"
+    where start = sym "'''"
+          end   = sym "'''"
 
 doubleQuotes :: PandocMonad m => MWParser m Inlines
 doubleQuotes = do
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..cbc523b25 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Metadata
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
 module Text.Pandoc.Readers.Metadata (
   yamlBsToMeta,
   yamlBsToRefs,
+  yamlMetaBlock,
   yamlMap ) where
 
 import Control.Monad
@@ -30,11 +31,13 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
+import qualified Data.Text.Lazy as TL
+import qualified Text.Pandoc.UTF8 as UTF8
 
-yamlBsToMeta :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text ParserState m (F Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -42,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do
        Right [] -> return . return $ mempty
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
+       -- the following is what we get from a comment:
+       Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
+                -> return . return $ mempty
        Right _  -> Prelude.fail "expected YAML object"
        Left (yamlpos, err')
                 -> do pos <- getPosition
@@ -63,11 +69,11 @@ lookupYAML t (YAML.Mapping _ _ m) =
 lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
-yamlBsToRefs :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text ParserState m (F [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -95,8 +101,12 @@ yamlBsToRefs pMetaValue idpred bstr =
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
        Right _  -> Prelude.fail "expecting YAML object"
-       Left (_pos, err')
-                -> Prelude.fail err'
+       Left (yamlpos, err')
+                -> do pos <- getPosition
+                      setPosition $ incSourceLine
+                            (setSourceColumn pos (YE.posColumn yamlpos))
+                            (YE.posLine yamlpos - 1)
+                      Prelude.fail err'
 
 
 nodeToKey :: YAML.Node YE.Pos -> Maybe Text
@@ -104,10 +114,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t))       = Just t
 nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
-normalizeMetaValue :: PandocMonad m
-                   => ParserT Text ParserState m (F MetaValue)
+normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text ParserState m (F MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -129,10 +139,10 @@ checkBoolean t
   | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
   | otherwise = Nothing
 
-yamlToMetaValue :: PandocMonad m
-                => ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text ParserState m (F MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -152,10 +162,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
   fmap MetaMap <$> yamlMap pMetaValue o
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
-yamlMap :: PandocMonad m
-        => ParserT Text ParserState m (F MetaValue)
+yamlMap :: (PandocMonad m, HasLastStrPosition st)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -171,3 +181,20 @@ yamlMap pMetaValue o = do
       return $ do
         v' <- fv
         return (k, v')
+
+-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+stopLine :: Monad m => ParserT Sources st m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b4eea9d3a..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,19 +36,20 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -13,20 +13,21 @@ Conversion of OPML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
-import Data.Char (toUpper)
 import Data.Default
-import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import qualified Data.Text.Lazy as TL
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.XML.Light
+import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
 
@@ -46,42 +47,27 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock ns)
+                 def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
     setAuthors (opmlDocAuthors st') $
     setDate (opmlDocDate st') $
     doc $ mconcat bs
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
 -- exceptT = either throwError return
@@ -105,11 +91,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
 parseBlock (Elem e) =
   case qName (elName e) of
         "ownerName"    -> mempty <$ modify (\st ->
-                              st{opmlDocAuthors = [text $ textContent e]})
+                              st{opmlDocAuthors = [text $ strContent e]})
         "dateModified" -> mempty <$ modify (\st ->
-                              st{opmlDocDate = text $ textContent e})
+                              st{opmlDocDate = text $ strContent e})
         "title"        -> mempty <$ modify (\st ->
-                              st{opmlDocTitle = text $ textContent e})
+                              st{opmlDocTitle = text $ strContent e})
         "outline" -> gets opmlSectionLevel >>= sect . (+1)
         "?xml"  -> return mempty
         _       -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 7b8bfd4b5..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt
@@ -15,7 +14,7 @@ Entry point to the odt reader.
 module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
+import Text.Pandoc.XML.Light
 
 import qualified Data.ByteString.Lazy as B
 
@@ -23,6 +22,8 @@ import System.FilePath
 
 import Control.Monad.Except (throwError)
 
+import qualified Data.Text as T
+
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -60,35 +61,37 @@ readOdt' _ bytes = bytesToOdt bytes-- of
 bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
 bytesToOdt bytes = case toArchiveOrFail bytes of
   Right archive -> archiveToOdt archive
-  Left _        -> Left $ PandocParseError "Couldn't parse odt file."
+  Left err      -> Left $ PandocParseError
+                        $ "Could not unzip ODT: " <> T.pack err
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
-  | Just contentEntry      <- findEntryByPath "content.xml" archive
-  , Just stylesEntry       <- findEntryByPath "styles.xml"  archive
-  , Just contentElem       <- entryToXmlElem contentEntry
-  , Just stylesElem        <- entryToXmlElem stylesEntry
-  , Right styles           <- chooseMax (readStylesAt stylesElem )
-                                        (readStylesAt contentElem)
-  , media                  <- filteredFilesFromArchive archive filePathIsOdtMedia
-  , startState             <- readerState styles media
-  , Right pandocWithMedia  <- runConverter' read_body
-                                            startState
-                                            contentElem
-
-  = Right pandocWithMedia
-
-  | otherwise
-    -- Not very detailed, but I don't think more information would be helpful
-  = Left $ PandocParseError "Couldn't parse odt file."
-    where
-      filePathIsOdtMedia :: FilePath -> Bool
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
+      onFailure _   (Just x) = Right x
+  contentEntry <- onFailure "Could not find content.xml"
+                   (findEntryByPath "content.xml" archive)
+  stylesEntry <- onFailure "Could not find styles.xml"
+                   (findEntryByPath "styles.xml" archive)
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
+               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
+  let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
         let (dir, name) = splitFileName fp
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
+  let media = filteredFilesFromArchive archive filePathIsOdtMedia
+  let startState = readerState styles media
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
+    (runConverter' read_body startState contentElem)
 
 
 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
 import Control.Arrow
 import qualified Control.Category as Cat
 import Control.Monad
-
+import Data.List (foldl')
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 import Text.Pandoc.Readers.Odt.Generic.Fallible
 
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
 iterateSL :: (Foldable f, MonadPlus m)
           => ArrowState s    x     y
           -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
   where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
 
 
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE Arrows            #-}
+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE DeriveFoldable    #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE PatternGuards     #-}
@@ -29,14 +30,14 @@ import Control.Monad ((<=<))
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
 
 import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import Text.Pandoc.Builder hiding (underline)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
 
 type BlockMatcher  = ElementMatcher Blocks
 
-
-newtype FirstMatch a = FirstMatch (Option (First a))
-                     deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+  deriving (Foldable, Monoid, Semigroup)
 
 firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
 
 --
 matchingElement :: (Monoid e)
@@ -557,7 +556,7 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                        >>?% mappend
     --
     extractText     :: XML.Content -> Fallible T.Text
-    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -577,7 +576,10 @@ read_spaces       = matchingElement NsText "s" (
 read_line_break  :: InlineMatcher
 read_line_break   = matchingElement NsText "line-break"
                     $ returnV linebreak
-
+--
+read_tab         :: InlineMatcher
+read_tab          = matchingElement NsText "tab"
+                    $ returnV space
 --
 read_span        :: InlineMatcher
 read_span         = matchingElement NsText "span"
@@ -585,6 +587,7 @@ read_span         = matchingElement NsText "span"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -604,6 +607,7 @@ read_paragraph    = matchingElement NsText "p"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -630,6 +634,7 @@ read_header       = matchingElement NsText "h"
   children <- ( matchChildContent [ read_span
                                   , read_spaces
                                   , read_line_break
+                                  , read_tab
                                   , read_link
                                   , read_note
                                   , read_citation
@@ -777,14 +782,14 @@ read_frame_img =
       ""   -> returnV mempty -< ()
       src' -> do
         let exts = extensionsFromList [Ext_auto_identifiers]
-        resource   <- lookupResource                          -< src'
+        resource   <- lookupResource                          -< T.unpack src'
         _          <- updateMediaWithResource                 -< resource
         w          <- findAttrText' NsSVG "width"             -< ()
         h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +809,8 @@ read_frame_mathml =
     case fold src of
       ""   -> returnV mempty -< ()
       src' -> do
-        let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+        let path = T.unpack $
+                    fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
         (_, mathml) <- lookupResource -< path
         case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,15 +61,15 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
+import           Data.List (foldl')
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
-
 import           Text.Pandoc.Readers.Odt.Generic.Namespaces
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -292,7 +293,7 @@ readNSattributes         = fromState $ \state -> maybe (state, failEmpty     )
                          => XMLConverterState nsID extraState
                          -> Maybe (XMLConverterState nsID extraState)
     extractNSAttrs startState
-                         = foldl (\state d -> state >>= addNS d)
+                         = foldl' (\state d -> state >>= addNS d)
                                  (Just startState)
                                  nsAttribs
       where nsAttribs    = mapMaybe readNSattr (XML.elAttribs element)
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt.Namespaces
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
 module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
                                           ) where
 
-import Data.List (isPrefixOf)
 import qualified Data.Map as M (empty, insert)
 import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Pandoc.Readers.Odt.Generic.Namespaces
 
 
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
 
 
 findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
 
 nsIDmap :: NameSpaceIRIs Namespace
 nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
                  -- Core XML (basically only for the 'id'-attribute)
                | NsXML
                  -- Fallback
-               | NsOther String
+               | NsOther Text
   deriving ( Eq, Ord, Show )
 
 -- | Not the actual iri's, but large prefixes of them - this way there are
 -- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
 nsIDs = [
   ("urn:oasis:names:tc:opendocument:xmlns:animation"        , NsAnim         ),
   ("urn:oasis:names:tc:opendocument:xmlns:chart"            , NsChart        ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE Arrows          #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE TupleSections   #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.StyleReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -43,14 +44,16 @@ import Control.Arrow
 
 import Data.Default
 import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
 import qualified Data.Map as M
 import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
 import qualified Data.Set as S
 
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
 
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 
@@ -90,7 +93,7 @@ instance Default FontPitch where
 --
 -- Thus, we want
 
-type FontFaceName = String
+type FontFaceName = Text
 
 type FontPitches = M.Map FontFaceName FontPitch
 
@@ -117,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
                               &&&
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
-                    >>?^ ( M.fromList . foldl accumLegalPitches [] )
+                    >>?^ ( M.fromList . foldl' accumLegalPitches [] )
                   ) `ifFailedDo` returnV (Right M.empty)
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
@@ -151,7 +154,7 @@ findPitch =     ( lookupAttr NsStyle "font-pitch"
 -- Definitions of main data
 --------------------------------------------------------------------------------
 
-type StyleName        = String
+type StyleName        = Text
 
 -- | There are two types of styles: named styles with a style family and an
 -- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
   -- \^ simpler, but in general less efficient
 
 data ListLevelStyle = ListLevelStyle { listLevelType  :: ListLevelType
-                                     , listItemPrefix :: Maybe String
-                                     , listItemSuffix :: Maybe String
+                                     , listItemPrefix :: Maybe Text
+                                     , listItemSuffix :: Maybe Text
                                      , listItemFormat :: ListItemNumberFormat
                                      , listItemStart  :: Int
                                      }
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
   show ListLevelStyle{..} =    "<LLS|"
                             ++ show listLevelType
                             ++ "|"
-                            ++ maybeToString listItemPrefix
+                            ++ maybeToString (T.unpack <$> listItemPrefix)
                             ++ show listItemFormat
-                            ++ maybeToString listItemSuffix
+                            ++ maybeToString (T.unpack <$> listItemSuffix)
                             ++ ">"
     where maybeToString = fromMaybe ""
 
@@ -471,7 +474,7 @@ readTextProperties =
      )
   where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
         isFontBold = ("normal",False):("bold",True)
-                    :map ((,True).show) ([100,200..900]::[Int])
+                    :map ((,True) . tshow) ([100,200..900]::[Int])
 
 readUnderlineMode     :: StyleReaderSafe _x (Maybe UnderlineMode)
 readUnderlineMode     = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
 readStrikeThroughMode = readLineMode "text-line-through-mode"
                                      "text-line-through-style"
 
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
 readLineMode modeAttr styleAttr = proc x -> do
   isUL <- searchAttr  NsStyle styleAttr False isLinePresent -< x
   mode <- lookupAttr' NsStyle  modeAttr                     -< x
@@ -560,12 +563,13 @@ readListLevelStyle levelType =      readAttr NsText "level"
 
 --
 chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
-                                    | otherwise    = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
   where
-   select ( ListLevelStyle       t1            p1          s1          f1          b1 )
-          ( ListLevelStyle       t2            p2          s2          f2          _ )
-        =   ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+   select l Nothing = Just l
+   select ( ListLevelStyle t1 p1 s1 f1 b1 )
+          ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+        =   Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+                                  (selectLinf f1 f2) b1
    select' LltNumbered _           = LltNumbered
    select' _           LltNumbered = LltNumbered
    select' _           _           = LltBullet
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.BlockStarts
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f2e8b1ab6..f18d2f9a7 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,9 +1,10 @@
 {-# LANGUAGE FlexibleContexts  #-}
-{-# LANGUAGE RecordWildCards   #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards   #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -38,10 +39,12 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
-
+import Data.List.NonEmpty (nonEmpty)
+import System.FilePath
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
+import Text.Pandoc.Sources (ToSources(..))
 
 --
 -- parsing blocks
@@ -294,24 +297,22 @@ verseBlock blockType = try $ do
 codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
 codeBlock blockAttrs blockType = do
   skipSpaces
-  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  content           <- rawBlockContent blockType
-  resultsContent    <- option mempty babelResultsBlock
-  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
-  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
-  let labelledBlck   = maybe (pure codeBlck)
-                             (labelDiv codeBlck)
-                             (blockAttrCaption blockAttrs)
+  (classes, kv)  <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  content        <- rawBlockContent blockType
+  resultsContent <- option mempty babelResultsBlock
+  let identifier = fromMaybe mempty $ blockAttrName blockAttrs
+  let codeBlk    = B.codeBlockWith (identifier, classes, kv) content
+  let wrap       = maybe pure addCaption (blockAttrCaption blockAttrs)
   return $
-    (if exportsCode kv    then labelledBlck   else mempty) <>
+    (if exportsCode kv    then wrap codeBlk   else mempty) <>
     (if exportsResults kv then resultsContent else mempty)
  where
-   labelDiv :: Blocks -> F Inlines -> F Blocks
-   labelDiv blk value =
-     B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+   addCaption :: F Inlines -> Blocks -> F Blocks
+   addCaption caption blk = B.divWith ("", ["captioned-content"], [])
+                         <$> (mkCaptionBlock caption <> pure blk)
 
-   labelledBlock :: F Inlines -> F Blocks
-   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+   mkCaptionBlock :: F Inlines -> F Blocks
+   mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain)
 
    exportsResults :: [(Text, Text)] -> Bool
    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
@@ -527,7 +528,9 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFileF blocksParser ["."] filename
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile blocksParser toSources
+                     [currentDir] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
@@ -543,8 +546,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         -- CAVE: partial function in else
-                         curMin = if null levels then 0 else minimum levels
+                         curMin = maybe 0 minimum $ nonEmpty levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
@@ -852,16 +854,52 @@ definitionListItem parseIndentedMarker = try $ do
    definitionMarker =
      spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
 
+-- | Checkbox for tasks.
+data Checkbox
+  = UncheckedBox
+  | CheckedBox
+  | SemicheckedBox
+
+-- | Parses a checkbox in a plain list.
+checkbox :: PandocMonad m
+         => OrgParser m Checkbox
+checkbox = do
+  guardEnabled Ext_task_lists
+  try (char '[' *> status <* char ']') <?> "checkbox"
+  where
+    status = choice
+      [ UncheckedBox   <$ char ' '
+      , CheckedBox     <$ char 'X'
+      , SemicheckedBox <$ char '-'
+      ]
+
+checkboxToInlines :: Checkbox -> Inline
+checkboxToInlines = B.Str . \case
+  UncheckedBox   -> "☐"
+  SemicheckedBox -> "☐"
+  CheckedBox     -> "☒"
+
 -- | parse raw text for one list item
 listItem :: PandocMonad m
          => OrgParser m Int
          -> OrgParser m (F Blocks)
 listItem parseIndentedMarker = try . withContext ListItemState $ do
   markerLength <- try parseIndentedMarker
+  box <- optionMaybe checkbox
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   rest <- T.concat <$> many (listContinuation markerLength)
-  parseFromString blocks $ firstLine <> blank <> rest
+  contents <- parseFromString blocks $ firstLine <> blank <> rest
+  return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
+
+-- | Prepend inlines to blocks, adding them to the first paragraph or
+-- creating a new Plain element if necessary.
+prependInlines :: Inline -> Blocks -> Blocks
+prependInlines inlns = B.fromList . prepend . B.toList
+  where
+    prepend (Plain is : bs) = Plain (inlns : Space : is) : bs
+    prepend (Para  is : bs) = Para  (inlns : Space : is) : bs
+    prepend bs              = Plain [inlns, Space] : bs
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ExportSettings
-   Copyright   : © 2016–2020 Albert Krewinkel
+   Copyright   : © 2016-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b234bee58..6862dd71e 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Inlines
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -262,7 +263,7 @@ berkeleyCitationList = try $ do
  where
    citationListPart :: PandocMonad m => OrgParser m (F Inlines)
    citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
-     notFollowedBy' citeKey
+     notFollowedBy' $ citeKey False
      notFollowedBy (oneOf ";]")
      inline
 
@@ -277,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
 
 berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
 berkeleyTextualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey False
   returnF . return $ Citation
     { citationId      = key
     , citationPrefix  = mempty
@@ -322,7 +323,7 @@ linkLikeOrgRefCite = try $ do
 -- from the `org-ref-cite-re` variable in `org-ref.el`.
 orgRefCiteKey :: PandocMonad m => OrgParser m Text
 orgRefCiteKey =
-  let citeKeySpecialChars = "-_:\\./," :: String
+  let citeKeySpecialChars = "-_:\\./" :: String
       isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
       isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
       endOfCitation = try $ do
@@ -350,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
 citation :: PandocMonad m => OrgParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey False
   suff <- suffix
   return $ do
     x <- pref
@@ -367,7 +368,7 @@ citation = try $ do
       }
  where
    prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+            manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
    suffix = try $ do
      hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
      skipSpaces
@@ -477,17 +478,17 @@ linkToInlinesF linkStr =
 
 internalLink :: Text -> Inlines -> F Inlines
 internalLink link title = do
-  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
-  if anchorB
+  ids <- asksF orgStateAnchorIds
+  if link `elem` ids
     then return $ B.link ("#" <> link) "" title
-    else return $ B.emph title
+    else let attr' = ("", ["spurious-link"] , [("target", link)])
+         in return $ B.spanWith attr' (B.emph title)
 
 -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
 -- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
 -- @org-target-regexp@, which is fairly liberal.  Since no link is created if
 -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
 -- an anchor.
-
 anchor :: PandocMonad m => OrgParser m (F Inlines)
 anchor =  try $ do
   anchorId <- parseAnchor
@@ -501,7 +502,6 @@ anchor =  try $ do
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
-
 solidify :: Text -> Text
 solidify = T.map replaceSpecialChar
  where replaceSpecialChar c
@@ -573,7 +573,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines)
 underline = fmap B.underline    <$> emphasisBetween '_'
 
 verbatim  :: PandocMonad m => OrgParser m (F Inlines)
-verbatim  = return . B.code     <$> verbatimBetween '='
+verbatim  = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
 
 code      :: PandocMonad m => OrgParser m (F Inlines)
 code      = return . B.code     <$> verbatimBetween '~'
@@ -803,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..a1b21046a 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Meta
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -239,7 +239,7 @@ lineOfInlines = do
 todoSequence :: Monad m => OrgParser m TodoSequence
 todoSequence = try $ do
   todoKws <- todoKeywords
-  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+  doneKws <- optionMaybe $ todoDoneSep *> doneKeywords
   newline
   -- There must be at least one DONE keyword. The last TODO keyword is
   -- taken if necessary.
@@ -250,11 +250,17 @@ todoSequence = try $ do
                     (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
 
  where
+   todoKeyword :: Monad m => OrgParser m Text
+   todoKeyword = many1Char nonspaceChar <* skipSpaces
+
    todoKeywords :: Monad m => OrgParser m [Text]
    todoKeywords = try $
-     let keyword = many1Char nonspaceChar <* skipSpaces
-         endOfKeywords = todoDoneSep <|> void newline
-     in manyTill keyword (lookAhead endOfKeywords)
+     let endOfKeywords = todoDoneSep <|> void newline
+     in manyTill todoKeyword (lookAhead endOfKeywords)
+
+   doneKeywords :: Monad m => OrgParser m [Text]
+   doneKeywords = try $
+     manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline)
 
    todoDoneSep :: Monad m => OrgParser m ()
    todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ParserState
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..f0949e205 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.Org.Parsing
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , ellipses
   , citeKey
   , gridTableWith
-  , insertIncludedFileF
-  -- * Re-exports from Text.Pandoc.Parsec
+  , insertIncludedFile
   , runParser
   , runParserT
   , getInput
@@ -100,21 +99,22 @@ module Text.Pandoc.Readers.Org.Parsing
   , getState
   , updateState
   , SourcePos
+  , sourceName
   , getPosition
   ) where
 
 import Data.Text (Text)
 import Text.Pandoc.Readers.Org.ParserState
 
-import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
-                            parseFromString)
+import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
+                                   parseFromString)
 import qualified Text.Pandoc.Parsing as P
 
 import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Shared
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..3990f0cb5 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -27,8 +27,7 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getCurrentTime)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -38,25 +37,25 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
+import System.FilePath (takeDirectory)
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -446,64 +453,43 @@ encoding
 -}
 
 includeDirective :: PandocMonad m
-                 => Text -> [(Text, Text)] -> Text
+                 => Text
+                 -> [(Text, Text)]
+                 -> Text
                  -> RSTParser m Blocks
 includeDirective top fields body = do
-  let f = trim top
-  guard $ not (T.null f)
+  let f = T.unpack $ trim top
+  guard $ not $ null f
   guard $ T.null (trim body)
-  -- options
-  let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
-  let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
-  oldPos <- getPosition
-  oldInput <- getInput
-  containers <- stateContainers <$> getState
-  when (f `elem` containers) $
-    throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
-  updateState $ \s -> s{ stateContainers = f : stateContainers s }
-  mbContents <- readFileFromDirs ["."] $ T.unpack f
-  contentLines <- case mbContents of
-                       Just s -> return $ T.lines s
-                       Nothing -> do
-                         logMessage $ CouldNotLoadIncludeFile f oldPos
-                         return []
-  let numLines = length contentLines
-  let startLine' = case startLine of
-                        Nothing            -> 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let endLine' = case endLine of
-                        Nothing            -> numLines + 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let contentLines' =   drop (startLine' - 1)
-                      $ take (endLine' - 1) contentLines
-  let contentLines'' = (case trim <$> lookup "end-before" fields of
-                             Just patt -> takeWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) .
-                       (case trim <$> lookup "start-after" fields of
-                             Just patt -> drop 1 .
-                                            dropWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) $ contentLines'
-  let contents' = T.unlines contentLines''
-  case lookup "code" fields of
-       Just lang -> do
-         let classes =  maybe [] T.words (lookup "class" fields)
-         let ident = maybe "" trimr $ lookup "name" fields
-         codeblock ident classes fields (trimr lang) contents' False
-       Nothing   -> case lookup "literal" fields of
-                         Just _  -> return $ B.rawBlock "rst" contents'
-                         Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
-                           updateState $ \s -> s{ stateContainers =
-                                         tail $ stateContainers s }
-                           return bs
-
+  let startLine = lookup "start-line" fields >>= safeRead
+  let endLine = lookup "end-line" fields >>= safeRead
+  let classes =  maybe [] T.words (lookup "class" fields)
+  let ident = maybe "" trimr $ lookup "name" fields
+  let parser =
+       case lookup "code" fields of
+         Just lang ->
+           (codeblock ident classes fields (trimr lang) False
+            . sourcesToText) <$> getInput
+         Nothing   ->
+           case lookup "literal" fields of
+             Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
+             Nothing -> parseBlocks
+  let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
+  let selectLines =
+        (case trim <$> lookup "end-before" fields of
+                         Just patt -> takeWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id) .
+        (case trim <$> lookup "start-after" fields of
+                         Just patt -> drop 1 .
+                                        dropWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id)
+
+  let toStream t =
+        Sources [(initialPos f,
+                   (T.unlines . selectLines . T.lines $ t) <>
+                    if isLiteral then mempty else "\n")]  -- see #7436
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile parser toStream [currentDir] f startLine endLine
 
 --
 -- list blocks
@@ -526,7 +512,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -613,8 +599,9 @@ comment = try $ do
   string ".."
   skipMany1 spaceChar <|> (() <$ lookAhead newline)
   -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
-  manyTill anyChar blanklines
+  _ <- anyLine
   optional indentedBlock
+  optional blanklines
   return mempty
 
 directiveLabel :: Monad m => RSTParser m Text
@@ -685,7 +672,7 @@ directive' = do
         "replace" -> B.para <$>  -- consumed by substKey
                    parseInlineFromText (trim top)
         "date" -> B.para <$> do  -- consumed by substKey
-                     t <- getCurrentTime
+                     t <- getTimestamp
                      let format = case T.unpack (T.strip top) of
                                     [] -> "%Y-%m-%d"
                                     x  -> x
@@ -731,8 +718,8 @@ directive' = do
                                      ""   -> stateRstHighlight def
                                      lang -> Just lang })
         x | x == "code" || x == "code-block" || x == "sourcecode" ->
-          codeblock name classes (map (second trimr) fields)
-             (trim top) body True
+          return $ codeblock name classes (map (second trimr) fields)
+             (trim top) True body
         "aafig" -> do
           let attribs = (name, ["aafig"], map (second trimr) fields)
           return $ B.codeBlockWith attribs $ stripTrailingNewlines body
@@ -877,10 +864,11 @@ csvTableDirective top fields rawcsv = do
                     (bs, _) <- fetchItem u
                     return $ UTF8.toText bs
                   Nothing -> return rawcsv
-  let res = parseCSV opts (case explicitHeader of
-                              Just h  -> h <> "\n" <> rawcsv'
-                              Nothing -> rawcsv')
-  case res of
+  let header' = case explicitHeader of
+                  Just h  -> parseCSV defaultCSVOptions h
+                  Nothing -> Right []
+  let res = parseCSV opts rawcsv'
+  case (<>) <$> header' <*> res of
        Left e  ->
          throwError $ PandocParsecError "csv table" e
        Right rawrows -> do
@@ -1017,10 +1005,10 @@ toChunks = dropWhile T.null
                           then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
                           else s
 
-codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool
-          -> RSTParser m Blocks
-codeblock ident classes fields lang body rmTrailingNewlines =
-  return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
+codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text
+          -> Blocks
+codeblock ident classes fields lang rmTrailingNewlines body =
+  B.codeBlockWith attribs $ stripTrailingNewlines' body
     where stripTrailingNewlines' = if rmTrailingNewlines
                                      then stripTrailingNewlines
                                      else id
@@ -1101,7 +1089,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1120,7 +1108,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1158,8 +1146,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1248,13 +1238,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1380,7 +1370,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
@@ -1656,21 +1646,4 @@ note = try $ do
       return $ B.note contents
 
 smart :: PandocMonad m => RSTParser m Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote $
-    B.singleQuoted . trimInlines . mconcat <$>
-      many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  withQuoteContext InDoubleQuote $
-    B.doubleQuoted . trimInlines . mconcat <$>
-      many1Till inline doubleQuoteEnd
+smart = smartPunctuation inline
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 484a6c923..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
@@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines
 symbol = B.str <$> countChar 1 nonspaceChar
 
 smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [ apostrophe
-           , dash
-           , ellipses
-           ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote
-    (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  withQuoteContext InDoubleQuote (doubleQuoteEnd >>
-   return (B.doubleQuoted $ B.trimInlines contents))
-   <|> return (B.str "\8220" B.<> contents)
+smart = smartPunctuation inline
 
 link :: PandocMonad m => TWParser m B.Inlines
 link = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Textile
    Copyright   : Copyright (C) 2010-2012 Paul Rivier
-                               2010-2020 John MacFarlane
+                               2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
@@ -11,7 +11,7 @@
    Portability : portable
 
 Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
+available at https://www.promptworks.com/textile/.
 
 Implemented and parsed:
  - Paragraphs
@@ -38,7 +38,8 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
 import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -52,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -83,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -101,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -120,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -155,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -173,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -185,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -207,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -249,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -280,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -299,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -307,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -325,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -338,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -349,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -359,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -375,8 +380,9 @@ table = try $ do
                              (toprow:rest) | any (fst . fst) toprow ->
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
-  let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+  let nbOfCols = maximum $ fmap length (headers :| rows)
+  let aligns = map (maybe AlignDefault minimum . nonEmpty) $
+                transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
   return $ B.table (B.simpleCaption $ B.plain caption)
@@ -386,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -395,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -407,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -421,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -445,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -459,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -505,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -520,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -533,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -545,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -576,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -588,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes :: PandocMonad m => TextileParser m Attr
+attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -641,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -657,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -668,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -698,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -707,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 474e4fac0..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
 {- |
    Module      : Text.Pandoc.Readers.Txt2Tags
    Copyright   : Copyright (C) 2014 Matthew Pickering
@@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader (Reader, asks, runReader)
 import Data.Default
 import Data.List (intercalate, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -31,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -53,25 +55,28 @@ getT2TMeta = do
     inps <- P.getInputFiles
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
-    let getModTime = fmap (formatTime defaultTimeLocale "%T") .
-                       P.getModificationTime
-    curMtime <- case inps of
-                  [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
-                  _ -> catchError
-                        (maximum <$> mapM getModTime inps)
-                        (const (return ""))
-    return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
+    curMtime <- catchError
+                 (mapM P.getModificationTime inps >>=
+                   (\case
+                       Nothing ->
+                         formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+                       Just ts -> return $
+                         formatTime defaultTimeLocale "%T" $ maximum ts)
+                    . nonEmpty)
+                (const (return ""))
+    return $ T2TMeta (T.pack curDate) (T.pack curMtime)
+                     (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -261,9 +266,9 @@ table = try $ do
   rows <- many1 (many commentLine *> tableRow)
   let columns = transpose rows
   let ncolumns = length columns
-  let aligns = map (foldr1 findAlign . map fst) columns
+  let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
-  let size = maximum (map length rows')
+  let size = maybe 0 maximum $ nonEmpty $ map length rows'
   let rowsPadded = map (pad size) rows'
   let headerPadded = if null tableHeader then mempty else pad size tableHeader
   let toRow = Row nullAttr . map B.simpleCell
@@ -278,10 +283,11 @@ pad :: (Monoid a) => Int -> [a] -> [a]
 pad n xs = xs ++ replicate (n - length xs) mempty
 
 
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
-  | x == y = x
-  | otherwise = AlignDefault
+findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment
+findAlign (x,_) (Just y)
+  | x == y = Just x
+  | otherwise = Just AlignDefault
+findAlign (x,_) Nothing = Just x
 
 headerRow :: T2T [(Alignment, Blocks)]
 headerRow = genericRow (string "||")
@@ -472,9 +478,29 @@ macro = try $ do
 -- raw URLs in text are automatically linked
 url :: T2T Inlines
 url = try $ do
-  (rawUrl, escapedUrl) <- try uri <|> emailAddress
+  (rawUrl, escapedUrl) <- try uri <|> emailAddress'
   return $ B.link rawUrl "" (B.str escapedUrl)
 
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+  (base, mailURI) <- emailAddress
+  query <- option "" emailQuery
+  return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+  char '?'
+  parts <- kv `sepBy1` (char '&')
+  return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+  k <- T.pack <$> many1 alphaNum
+  char '='
+  let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+  v <- T.pack <$> many1 vchar
+  return (k <> "=" <> v)
+
 uri :: T2T (Text, Text)
 uri = try $ do
   address <- t2tURI
@@ -564,7 +590,7 @@ getTarget = do
               _                               -> "html"
 
 atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+atStart = getPosition >>= guard . (== 1) . sourceColumn
 
 ignoreSpacesCap :: T2T Text -> T2T Text
 ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants